In [None]:
import numpy as np 
import pandas as pd

import seaborn as sns
from matplotlib import pyplot as plt
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [None]:
df = pd.read_csv('../input/vegetable-market/Vegetable_market.csv') 

In [None]:
df.head()

In [None]:
{columns : len(df[columns].unique()) for columns in df.select_dtypes('object')}

In [None]:
df['Month'].value_counts()

In [None]:
df.isna().sum()

In [None]:
def onehot_encode(df, columns):
    
    dummies =pd.get_dummies(df[columns], prefix = columns)
    df = pd.concat([df,dummies], axis=1)
    df = df.drop(columns, axis=1)
    
    return df
    

In [None]:
def preprocess_inputs(df):
    df=df.copy()
    
    #Treating missing value in Month
    
    df['Month'] = df['Month'].replace({' ', np.nan})
    df =df.dropna()
    
    
    # encoding columns
    
    df['Deasaster Happen in last 3month'] = df['Deasaster Happen in last 3month'].replace({'no':0, 'yes':1})
    df = onehot_encode(df, columns=['Vegetable','Season', 'Month','Vegetable condition'])
    
    # Splitting
    
    X = df.drop('Price per kg', axis =1)
    y = df['Price per kg']
    
    X_train, X_test,y_train,y_test = train_test_split(X,y, train_size=0.7, random_state = 1)
    
    
    # Scaling
    
    sc = StandardScaler()
    sc.fit(X_train)
    
    X_train = pd.DataFrame(sc.transform(X_train), columns = X.columns)
    X_test = pd.DataFrame(sc.transform(X_test), columns = X.columns)
    
    
    return  X_train, X_test,y_train,y_test 
    

In [None]:
df['Vegetable condition'].unique()

In [None]:
 X_train, X_test,y_train,y_test = preprocess_inputs(df)

In [None]:
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from xgboost import XGBRegressor
from catboost import CatBoostRegressor

In [None]:
models= {
    
    'lr': LinearRegression(), 
    'lr_l2' : Ridge(), 
    'lr_l1': Lasso(),
    'Knn' : KNeighborsRegressor(),
    'MLP' : MLPRegressor(),
    "RF" : RandomForestRegressor(), 
    'GB' : GradientBoostingRegressor(),
    "XGB" :  XGBRegressor(),
    "cat": CatBoostRegressor()
    
}

In [None]:
for name, model in models.items():
    model.fit(X_train,y_train)
    print(name + ' trained')

In [None]:
for name, model in models.items():
    print( name + 'R^2 score: {:.5f}'.format(model.score(X_test,y_test)))