In [2]:
import os

In [9]:
from dotenv import load_dotenv

load_dotenv()

MONOGODB_CONNECTION_URL = os.getenv("MONGODB_URL")

DATABASE_NAME = "MUSHROOM_DB"
COLLECTION_NAME = "mushroom_data"

In [11]:
from pymongo import MongoClient
import certifi

ca = certifi.where()

client = MongoClient(MONOGODB_CONNECTION_URL, tlsCAFile=ca)
database = client[DATABASE_NAME]
collection = database[COLLECTION_NAME]

In [14]:
import pandas as pd

In [28]:
df  = pd.read_csv('mushrooms.csv')

In [29]:
df.shape

(8124, 23)

In [30]:
df.columns

Index(['class', 'cap-shape', 'cap-surface', 'cap-color', 'bruises', 'odor',
       'gill-attachment', 'gill-spacing', 'gill-size', 'gill-color',
       'stalk-shape', 'stalk-root', 'stalk-surface-above-ring',
       'stalk-surface-below-ring', 'stalk-color-above-ring',
       'stalk-color-below-ring', 'veil-type', 'veil-color', 'ring-number',
       'ring-type', 'spore-print-color', 'population', 'habitat'],
      dtype='object')

In [31]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8124 entries, 0 to 8123
Data columns (total 23 columns):
 #   Column                    Non-Null Count  Dtype 
---  ------                    --------------  ----- 
 0   class                     8124 non-null   object
 1   cap-shape                 8124 non-null   object
 2   cap-surface               8124 non-null   object
 3   cap-color                 8124 non-null   object
 4   bruises                   8124 non-null   object
 5   odor                      8124 non-null   object
 6   gill-attachment           8124 non-null   object
 7   gill-spacing              8124 non-null   object
 8   gill-size                 8124 non-null   object
 9   gill-color                8124 non-null   object
 10  stalk-shape               8124 non-null   object
 11  stalk-root                8124 non-null   object
 12  stalk-surface-above-ring  8124 non-null   object
 13  stalk-surface-below-ring  8124 non-null   object
 14  stalk-color-above-ring  

In [32]:
df.isnull().sum()

class                       0
cap-shape                   0
cap-surface                 0
cap-color                   0
bruises                     0
odor                        0
gill-attachment             0
gill-spacing                0
gill-size                   0
gill-color                  0
stalk-shape                 0
stalk-root                  0
stalk-surface-above-ring    0
stalk-surface-below-ring    0
stalk-color-above-ring      0
stalk-color-below-ring      0
veil-type                   0
veil-color                  0
ring-number                 0
ring-type                   0
spore-print-color           0
population                  0
habitat                     0
dtype: int64

In [34]:
data = df.to_dict(orient='records')

In [35]:
len(data)

8124

In [36]:
data[0]

{'class': 'p',
 'cap-shape': 'x',
 'cap-surface': 's',
 'cap-color': 'n',
 'bruises': 't',
 'odor': 'p',
 'gill-attachment': 'f',
 'gill-spacing': 'c',
 'gill-size': 'n',
 'gill-color': 'k',
 'stalk-shape': 'e',
 'stalk-root': 'e',
 'stalk-surface-above-ring': 's',
 'stalk-surface-below-ring': 's',
 'stalk-color-above-ring': 'w',
 'stalk-color-below-ring': 'w',
 'veil-type': 'p',
 'veil-color': 'w',
 'ring-number': 'o',
 'ring-type': 'p',
 'spore-print-color': 'k',
 'population': 's',
 'habitat': 'u'}

In [37]:
# insert records into MongoDB Collection

rec = collection.insert_many(data)

In [38]:
## Retrive records from MongoDB

records = collection.find()

In [39]:
records

<pymongo.cursor.Cursor at 0x7f4b0dd4c7f0>

In [40]:
for i, j in enumerate(records):
    print(f"{i} - {j}")
    if i == 4:
        break

0 - {'_id': ObjectId('65e225c3bcd6a05f1079aee7'), 'class': 'p', 'cap-shape': 'x', 'cap-surface': 's', 'cap-color': 'n', 'bruises': 't', 'odor': 'p', 'gill-attachment': 'f', 'gill-spacing': 'c', 'gill-size': 'n', 'gill-color': 'k', 'stalk-shape': 'e', 'stalk-root': 'e', 'stalk-surface-above-ring': 's', 'stalk-surface-below-ring': 's', 'stalk-color-above-ring': 'w', 'stalk-color-below-ring': 'w', 'veil-type': 'p', 'veil-color': 'w', 'ring-number': 'o', 'ring-type': 'p', 'spore-print-color': 'k', 'population': 's', 'habitat': 'u'}
1 - {'_id': ObjectId('65e225c3bcd6a05f1079aee8'), 'class': 'e', 'cap-shape': 'x', 'cap-surface': 's', 'cap-color': 'y', 'bruises': 't', 'odor': 'a', 'gill-attachment': 'f', 'gill-spacing': 'c', 'gill-size': 'b', 'gill-color': 'k', 'stalk-shape': 'e', 'stalk-root': 'c', 'stalk-surface-above-ring': 's', 'stalk-surface-below-ring': 's', 'stalk-color-above-ring': 'w', 'stalk-color-below-ring': 'w', 'veil-type': 'p', 'veil-color': 'w', 'ring-number': 'o', 'ring-type'

In [41]:
dataframe = pd.DataFrame(list(collection.find()))

In [42]:
dataframe.shape

(8124, 24)

In [43]:
dataframe.head()

Unnamed: 0,_id,class,cap-shape,cap-surface,cap-color,bruises,odor,gill-attachment,gill-spacing,gill-size,...,stalk-surface-below-ring,stalk-color-above-ring,stalk-color-below-ring,veil-type,veil-color,ring-number,ring-type,spore-print-color,population,habitat
0,65e225c3bcd6a05f1079aee7,p,x,s,n,t,p,f,c,n,...,s,w,w,p,w,o,p,k,s,u
1,65e225c3bcd6a05f1079aee8,e,x,s,y,t,a,f,c,b,...,s,w,w,p,w,o,p,n,n,g
2,65e225c3bcd6a05f1079aee9,e,b,s,w,t,l,f,c,b,...,s,w,w,p,w,o,p,n,n,m
3,65e225c3bcd6a05f1079aeea,p,x,y,w,t,p,f,c,n,...,s,w,w,p,w,o,p,k,s,u
4,65e225c3bcd6a05f1079aeeb,e,x,s,g,f,n,f,w,b,...,s,w,w,p,w,o,e,n,a,g


In [44]:
if "_id" in dataframe.columns.to_list():
    dataframe = dataframe.drop(columns=["_id"], axis=1)

In [46]:
df.head()

Unnamed: 0,class,cap-shape,cap-surface,cap-color,bruises,odor,gill-attachment,gill-spacing,gill-size,gill-color,...,stalk-surface-below-ring,stalk-color-above-ring,stalk-color-below-ring,veil-type,veil-color,ring-number,ring-type,spore-print-color,population,habitat
0,p,x,s,n,t,p,f,c,n,k,...,s,w,w,p,w,o,p,k,s,u
1,e,x,s,y,t,a,f,c,b,k,...,s,w,w,p,w,o,p,n,n,g
2,e,b,s,w,t,l,f,c,b,n,...,s,w,w,p,w,o,p,n,n,m
3,p,x,y,w,t,p,f,c,n,n,...,s,w,w,p,w,o,p,k,s,u
4,e,x,s,g,f,n,f,w,b,k,...,s,w,w,p,w,o,e,n,a,g
