<a href="https://colab.research.google.com/github/oduoranto/Machine-Learning/blob/main/Rice_Prices_Prediction_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [120]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
import statsmodels.api as sm
from sklearn.preprocessing import MinMaxScaler, StandardScaler, PolynomialFeatures
from sklearn.pipeline import Pipeline
from sklearn.linear_model import Ridge, Lasso, ElasticNet
from sklearn.model_selection import GridSearchCV, TimeSeriesSplit
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn import metrics

In [84]:
food_prices = pd.read_csv('/content/wfp_food_prices_ken (1).csv')

In [85]:
food_prices.head()

Unnamed: 0,date,admin1,admin2,market,latitude,longitude,category,commodity,unit,priceflag,pricetype,currency,price,usdprice
0,#date,#adm1+name,#adm2+name,#loc+market+name,#geo+lat,#geo+lon,#item+type,#item+name,#item+unit,#item+price+flag,#item+price+type,#currency,#value,#value+usd
1,2006-01-15,Coast,Mombasa,Mombasa,-4.05,39.666667,cereals and tubers,Maize,KG,actual,Wholesale,KES,16.13,0.2235
2,2006-01-15,Coast,Mombasa,Mombasa,-4.05,39.666667,cereals and tubers,Maize (white),90 KG,actual,Wholesale,KES,1480.0,20.5041
3,2006-01-15,Coast,Mombasa,Mombasa,-4.05,39.666667,pulses and nuts,Beans (dry),90 KG,actual,Wholesale,KES,3246.0,44.9705
4,2006-01-15,Eastern,Kitui,Kitui,-1.366667,38.016667,cereals and tubers,Potatoes (Irish),50 KG,actual,Wholesale,KES,1249.99,17.3175


In [86]:
food_prices.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10648 entries, 0 to 10647
Data columns (total 14 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   date       10648 non-null  object
 1   admin1     10648 non-null  object
 2   admin2     10648 non-null  object
 3   market     10648 non-null  object
 4   latitude   10648 non-null  object
 5   longitude  10648 non-null  object
 6   category   10648 non-null  object
 7   commodity  10648 non-null  object
 8   unit       10648 non-null  object
 9   priceflag  10648 non-null  object
 10  pricetype  10648 non-null  object
 11  currency   10648 non-null  object
 12  price      10648 non-null  object
 13  usdprice   10648 non-null  object
dtypes: object(14)
memory usage: 1.1+ MB


In [87]:
food_prices.columns

Index(['date', 'admin1', 'admin2', 'market', 'latitude', 'longitude',
       'category', 'commodity', 'unit', 'priceflag', 'pricetype', 'currency',
       'price', 'usdprice'],
      dtype='object')

In [88]:
print(food_prices['commodity'].unique()) if 'commodity' in food_prices.columns else print("'commodity' column not found in the DataFrame.")

['#item+name' 'Maize' 'Maize (white)' 'Beans (dry)' 'Potatoes (Irish)'
 'Sorghum' 'Milk (cow, pasteurized)' 'Beans' 'Bread' 'Oil (vegetable)'
 'Fuel (diesel)' 'Fuel (kerosene)' 'Fuel (petrol-gasoline)' 'Maize flour'
 'Rice' 'Wheat flour' 'Milk (UHT)' 'Cooking fat' 'Bananas' 'Onions (red)'
 'Tomatoes' 'Potatoes (Irish, red)' 'Beans (kidney)' 'Beans (rosecoco)'
 'Beans (yellow)' 'Cabbage' 'Potatoes (Irish, white)' 'Kale'
 'Rice (aromatic)' 'Sorghum (red)' 'Cowpeas' 'Onions (dry)' 'Spinach'
 'Maize (white, dry)' 'Millet (finger)' 'Fish (omena, dry)'
 'Cowpea leaves' 'Sorghum (white)' 'Beans (dolichos)' 'Beans (mung)'
 'Rice (imported, Pakistan)' 'Salt' 'Sugar' 'Meat (beef)' 'Meat (goat)'
 'Milk (camel, fresh)' 'Milk (cow, fresh)' 'Meat (camel)']


In [89]:
rice_variations = ['Rice', 'Rice (aromatic)', 'Rice (imported, Pakistan)']
rice_data = food_prices[food_prices['commodity'].isin(rice_variations)]
print(rice_data.head)

<bound method NDFrame.head of              date         admin1   admin2                  market  \
4198   2020-08-15        Nairobi  Nairobi                 Nairobi   
4280   2021-01-15        Eastern    Kitui      Kitui town (Kitui)   
4314   2021-01-15  North Eastern  Garissa  Garissa town (Garissa)   
4321   2021-01-15  North Eastern  Mandera        Takaba (Mandera)   
4340   2021-01-15    Rift Valley   Nakuru       Wakulima (Nakuru)   
...           ...            ...      ...                     ...   
10597  2024-03-15    Rift Valley  Turkana       Ethiopia (Kakuma)   
10610  2024-03-15    Rift Valley  Turkana                Kakuma 2   
10617  2024-03-15    Rift Valley  Turkana                Kakuma 3   
10628  2024-03-15    Rift Valley  Turkana   Kalobeyei (Village 1)   
10633  2024-03-15    Rift Valley  Turkana   Kalobeyei (Village 2)   

               latitude         longitude            category  \
4198          -1.283333         36.816667  cereals and tubers   
4280       

In [90]:
rice_data.columns

Index(['date', 'admin1', 'admin2', 'market', 'latitude', 'longitude',
       'category', 'commodity', 'unit', 'priceflag', 'pricetype', 'currency',
       'price', 'usdprice'],
      dtype='object')

In [91]:
rice_data.shape

(452, 14)

In [92]:
rice_data.isnull().sum()

Unnamed: 0,0
date,0
admin1,0
admin2,0
market,0
latitude,0
longitude,0
category,0
commodity,0
unit,0
priceflag,0


In [93]:
columns_to_keep = ['date','latitude', 'longitude',
                       'commodity', 'unit', 'currency', 'price', 'usdprice']
columns_to_drop = [col for col in rice_data.columns if col not in columns_to_keep]
df_cleaned = rice_data.drop(columns=columns_to_drop, errors='ignore')

In [94]:
df_cleaned.head()

Unnamed: 0,date,latitude,longitude,commodity,unit,currency,price,usdprice
4198,2020-08-15,-1.283333,36.816667,Rice,KG,KES,104.0,0.9634
4280,2021-01-15,-1.35657,38.00825,Rice (aromatic),50 KG,KES,5625.0,51.1364
4314,2021-01-15,-0.459682,39.64136,Rice (aromatic),50 KG,KES,5750.0,52.2727
4321,2021-01-15,3.394616,40.225882,Rice (aromatic),50 KG,KES,6400.0,58.1818
4340,2021-01-15,-0.303099,36.080026,"Rice (imported, Pakistan)",50 KG,KES,4670.0,42.4545


In [95]:
df_cleaned.isnull().sum()

Unnamed: 0,0
date,0
latitude,0
longitude,0
commodity,0
unit,0
currency,0
price,0
usdprice,0


In [96]:
#Changing the words to numeric values
df_cleaned.replace({'commodity': {'Rice': 1, 'Rice (aromatic)': 2, 'Rice (imported, Pakistan)': 3}}, inplace=True)
df_cleaned.replace({'unit': {'50 KG': 50, 'KG': 0}}, inplace=True)



  df_cleaned.replace({'commodity': {'Rice': 1, 'Rice (aromatic)': 2, 'Rice (imported, Pakistan)': 3}}, inplace=True)
  df_cleaned.replace({'unit': {'50 KG': 50, 'KG': 0}}, inplace=True)


Splitting training and test data

In [114]:
df_cleaned['date'] = pd.to_datetime(df_cleaned['date']).apply(lambda x: x.toordinal())
df_cleaned.head()


Unnamed: 0,date,latitude,longitude,commodity,unit,currency,price,usdprice
4198,719163,-1.283333,36.816667,1,0,KES,104.0,0.9634
4280,719163,-1.35657,38.00825,2,50,KES,5625.0,51.1364
4314,719163,-0.459682,39.64136,2,50,KES,5750.0,52.2727
4321,719163,3.394616,40.225882,2,50,KES,6400.0,58.1818
4340,719163,-0.303099,36.080026,3,50,KES,4670.0,42.4545


In [116]:
X = df_cleaned.drop(['price','currency', 'usdprice'], axis=1)
Y = df_cleaned['price']

Splitting the data into training and test data

In [117]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.1, random_state=2)
X_train.head()

Unnamed: 0,date,latitude,longitude,commodity,unit
7292,719163,-1.28444,36.74361,1,0
4534,719163,3.394616,40.225882,2,50
9494,719163,3.768113,34.82177,1,0
6413,719163,-0.303099,36.080026,2,50
7497,719163,-4.039972,39.6933678,1,0


Model Training

In [118]:
linear_model = LinearRegression()
linear_model.fit(X_train, Y_train)

Model evaluation

In [123]:
test_data_prediction1 = linear_model.predict(X_test)
error_score = metrics.r2_score(Y_test, test_data_prediction1)
print( 'R score = ', error_score)

R score =  0.9316887289706366


Random Forest Regressor

In [122]:
regressor = RandomForestRegressor(n_estimators=100, random_state=0)
regressor.fit(X_train, Y_train)

In [124]:
regressor_prediction = regressor.predict(X_test)
error_score = metrics.r2_score(Y_test, regressor_prediction)
print( 'R score = ', error_score)

R score =  0.9559758094978271


Decison Tree regressor

In [125]:
decisonTree = DecisionTreeRegressor()
decisonTree.fit(X_train, Y_train)

In [126]:
tree_prediction = regressor.predict(X_test)
error_score = metrics.r2_score(Y_test, tree_prediction)
print( 'R score = ', error_score)

R score =  0.9559758094978271


Lasso regression

In [127]:
lasso_regressor = Lasso()
lasso_regressor.fit(X_train, Y_train)

In [128]:
lasso_regressor_prediction = regressor.predict(X_test)
error_score = metrics.r2_score(Y_test, lasso_regressor_prediction)
print( 'R score = ', error_score)

R score =  0.9559758094978271
