<a href="https://colab.research.google.com/github/mkri/master/blob/master/modeling_price.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Building FastAi Model

In [1]:
#Printing thw working directory
import os
wd = os.getcwd()

In [2]:
wd

'/content'

In [3]:
# Setting The Home Directory for the project
home_dir = wd[:-7]

In [4]:
home_dir

'/'

In [5]:
#Listing the home directory and working directory contents
print('HOME DIRECTORY')
print(os.listdir(home_dir))
print('WORKING DIRECTORY')
print(os.listdir(wd))

HOME DIRECTORY
['run', 'proc', 'lib', 'srv', 'root', 'sys', 'var', 'media', 'sbin', 'mnt', 'etc', 'boot', 'opt', 'home', 'bin', 'tmp', 'usr', 'lib64', 'dev', 'content', '.dockerenv', 'datalab', 'tools', 'swift', 'tensorflow-1.15.2', 'lib32']
WORKING DIRECTORY
['.config', 'datafileNew.csv', 'sample_data']


## Loading The Datasets

In [7]:
import pandas as pd
import numpy as np
training_set = pd.read_csv(wd+'/datafileNew.csv')

In [8]:
training_set.head(5)

Unnamed: 0,Crop,Year,Month,Price,Location
0,Rice,2020,Jan,1.8,Mumbai
1,Wheat,2020,Feb,1.2,Pune
2,Coarse Cereals,2020,Mar,1.6,Chennai
3,Pulses,2020,Apr,0.98,Chennai
4,Vegetables,2020,May,1.6,Coimbatore


### Selecting Few Features

In [9]:
cols = ['Crop', 'Year', 'Month', 'Price',
       'Location']

In [11]:
train_d = training_set[cols]

In [12]:
train_d.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 179 entries, 0 to 178
Data columns (total 5 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Crop      179 non-null    object 
 1   Year      179 non-null    int64  
 2   Month     179 non-null    object 
 3   Price     179 non-null    float64
 4   Location  179 non-null    object 
dtypes: float64(1), int64(1), object(3)
memory usage: 7.1+ KB


## Modeling With Fast.ai


In [14]:
from fastai.tabular import *
#This path will be used for saving and exporting the model
path = wd

In [15]:
#The target variable that we are trying to predict
dep_var = 'Price'

#The categorical variables 
cat_names = list(train_d.select_dtypes('object').columns)

#The continuous variables
cont_names =[] #No need to keep the Dependend variable

#Preprocessing steps for the fastai learner
procs = [FillMissing, Categorify, Normalize]

In [16]:
#Creating a validation set
val = TabularList.from_df(train_d.iloc[800:1000].copy(), path=path, cat_names=cat_names, cont_names=cont_names)

In [17]:
#Creating a trainig set
data = (TabularList.from_df(train_d, path=path, cat_names=cat_names, cont_names=cont_names, procs=procs)
                           .split_by_idx(list(range(len(train_d) - int(len(train_d) * 0.2),len(train_d))))
                           .label_from_df(cols=dep_var)
                           .add_test(val)
                           .databunch())

In [18]:
data.show_batch(5)

Crop,Month,Location,target
Coarse Cereals,Mar,Pune,1.8
Wheat,Dec,Coimbatore,2.0
All Agriculture,Jan,Jaipur,2.35
Sugarcane,Dec,Mumbai,4.0
Fruits,Aug,Kolkata,2.35


### Initializing Neural Network

In [19]:
learn = tabular_learner(data, layers=[300,100, 100, 50], metrics= rmse)

### Training The Model

In [20]:
learn.fit(25, 1e-2)

epoch,train_loss,valid_loss,root_mean_squared_error,time
0,8.30083,7.51022,2.740478,00:00
1,8.146402,7.109219,2.666312,00:00
2,7.727511,6.622099,2.573344,00:00
3,7.320852,6.160753,2.482086,00:00
4,6.925292,5.732238,2.394209,00:00
5,6.547284,5.177146,2.275334,00:00
6,6.177205,4.743225,2.177895,00:00
7,5.758797,4.245255,2.060402,00:00
8,5.379653,3.016415,1.736783,00:00
9,4.987915,2.150951,1.466612,00:00


In [21]:
learn.show_results(ds_type=DatasetType.Train)

Crop,Month,Location,target,prediction
Sugarcane,Feb,Pune,1.98,[1.722399]
"Eggs, Fish and Meat",Oct,Hyderabad,4.5,[2.727291]
Vegetables,Oct,Mumbai,1.6,[1.682112]
Milk,Aug,Kochi,1.6,[1.70914]
"Eggs, Fish and Meat",May,Hyderabad,4.0,[2.524937]


In [23]:
learn.show_results(ds_type=DatasetType.Valid)

Crop,Month,Location,target,prediction
Oilseeds,Jan,Chennai,1.7,[2.556709]
Sugarcane,Feb,Kochi,2.0,[2.022837]
Fibers,Mar,Kolkata,2.35,[2.842332]
All Agriculture,Apr,Chennai,6.0,[3.034599]
Wheat,May,Coimbatore,1.8,[1.898244]


## Saving & Exporting The Model

In [24]:
learn.save('model',return_path=True)

PosixPath('/content/models/model.pth')

In [25]:
learn.export('model.pkl')