In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
avocado = pd.read_csv('../input/avocado.csv')

In [None]:
avocado.shape

In [None]:
avocado.head()

In [None]:
avocado.columns

In [None]:
avocado.drop('Unnamed: 0',axis=1,inplace=True)

In [None]:
avocado.describe()

In [None]:
avocado.info()

In [None]:
def low_cardinality_cols(dataframe):
    low_card_cols = [cname for cname in dataframe.columns if (dataframe[cname].nunique()<55 and dataframe[cname].dtype=='object')]
    return (low_card_cols)
                                                              

In [None]:
low_cardinality_cols(avocado)

In [None]:
def cols_with_missing_values(dataframe):
    cols_missing_data = [cname for cname in dataframe.columns 
                        if dataframe[cname].isnull().any()]
    return (cols_missing_data)

In [None]:
cols_with_missing_values(avocado)

In [None]:
avocado['region'].unique() # we have a total column, we can delete those records

In [None]:
avocado[ avocado['region'] == 'TotalUS'].head()

In [None]:
avocado = avocado[ avocado['region'] != 'TotalUS']

In [None]:
# checking if the records are removed

In [None]:
avocado[ avocado['region'] == 'TotalUS']

In [None]:
# adding new columns
avocado['small Hass'] = avocado['4046']
avocado['large Hass'] = avocado['4225']
avocado['extra large Hass'] = avocado['4770']

In [None]:
avocado.columns

In [None]:
# removing the number columns
avocado.drop(['4046','4225','4770'],axis=1,inplace=True)

In [None]:
avocado.columns

In [None]:
# get the values for the region column
region_dummies =   pd.get_dummies(data=avocado['region'])

In [None]:
# similar for the year column
year_dummies =   pd.get_dummies(data=avocado['year'])

In [None]:
# join the dataframes on index
avocado =   avocado.join(other=region_dummies,on=region_dummies.index,how='inner')

In [None]:
avocado.drop('key_0',axis=1,inplace=True)

In [None]:
avocado  = avocado.join(other=year_dummies,on=year_dummies.index,how='inner')

In [None]:
# check the new shape
avocado.shape

In [None]:
# check the new columns
avocado.columns

In [None]:
# create the feature
X = avocado.drop(['key_0','Total Volume','Total Bags','Date', 'year','type','region'],axis=1)
y = avocado['type']

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=101)

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
lr = LogisticRegression()

In [None]:
lr.fit(X_train,y_train)

In [None]:
predictions = lr.predict(X_test)

In [None]:
from sklearn.metrics import classification_report,confusion_matrix

In [None]:
print(confusion_matrix(y_test,predictions))

In [None]:
print(classification_report(y_test,predictions))