In [None]:
# Setup
import os
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import cufflinks as cf
init_notebook_mode(connected=True)
cf.go_offline()
%matplotlib inline

In [None]:
# Data file
path = '../input/market-price-of-onion-2020/Onion Prices 2020.csv'

# Reading data
Data = pd.read_csv(path)
Data.head()

In [None]:
# Extracting Date & Month from arrival_date
Data['Month'] = Data['arrival_date'].apply(lambda x :x.split('/')[1])
Data['Date'] = Data['arrival_date'].apply(lambda x :x.split('/')[0])

In [None]:
Data.head()

Before we go indepth, 
- lets check for row & column count & also null values.
- also number of 'States, Districts, Markets, Commodity & Variety' mentioned in data.

In [None]:
# So let's determine the shape of our data.
# Will also check for null values

print('Data Info :\n\tRow Count - {r:}\n\tColumn Count - {c:}\n\tNull Values - {n:}'.format(r=Data.shape[0],
                                            c=Data.shape[1], n=Data.isna().sum().sum()))

# Lets check for number of unique values for all categorical columns.
print('\nValue Count :')
for category in ['state', 'district', 'market', 'commodity', 'variety']:
    print("\t'{c:}s' mentioned : {n:}".format(c=category.title(), n=Data[category].nunique()))

# ***Min, Max, Model Prices*** 
- We will have some look at **distribution plots** for onion prices.
- **Conclusion :**
    - a. There was a situation when ***onion price was Zero. Yes 0.*** And rised ***upto 18000 ₹/Quintal minimum price.*** This would be due to ***'demand supply gap'*** We will explore this section further considering arrival dates into account. 
    - b. The prices are quite ***stable around 2000₹/Quintal*** i.e most of the time the market price was 2000₹. We will also see this timezone. 
    - c. All three prices category will be highly correlated with each other which is quite obvious. 

In [None]:
# Min, Max & Modal Price Distribution
plt.figure(figsize=(12, 5))
sns.set_style('darkgrid')
sns.distplot(a=Data['min_price'], bins=80, color='green', hist=False, label='Minimum Price')
sns.distplot(a=Data['max_price'], bins=80, color='red', hist=False, label='Maximum Price')
sns.distplot(a=Data['modal_price'], bins=80, color='orange', hist=False, label='Modal Price')
plt.title('Various Price Distributions')
plt.xlabel('Price')
sns.despine()

In [None]:
# As we saw that onion prices were zero. let's explore more into it
print('Zero ₹ Onion Prices :\n\tState Count : {c:}\n\tState List : {s:}\n\tVariety List : {v:}\n'.format(c=Data[Data['min_price']==0]['state'].nunique(),
                                                                            s=list(Data[Data['min_price']==0]['state'].unique()),
                                                                            v=list(Data[Data['min_price']==0]['variety'].unique())))


# Also consider onion price to be 18000₹/Quintal. 
print('18000₹/Q Onion Price :\n\tState Count : {c:}\n\tState List : {s:}\n\tVariety List : {v:}'.format(c=Data[Data['min_price']==18000]['state'].nunique(),
                                                                            s=list(Data[Data['min_price']==18000]['state'].unique()),
                                                                            v=list(Data[Data['min_price']==18000]['variety'].unique())))

# ***States, Month & Prices***
- We will be analysing ***Onion Prices Situation*** for every month in **each State Seperately**


- **Conclusion :**
    - a. The Onion prices were **extremely high in January 2020** for every state. Highest Recorded Prices were in **Kerala & Nagaland** with **+7000 ₹/Quintal**. The **main reason** for rise in price was **damage to the onion crop due to rains.**
    - b. Then came the saviour **Rabi Season Onion crops (70% of total crops produced in a year) in month of March** in markets all over India and thus prices started to fall back to normal and were **lowest in month of May & June** for almost every State.
    - c. Finally came ***Monsoon***. *Monsoon Rain damaged* the stored **Rabi crops** that support market demand till August-October & also **Early Kharif crops (20% of total crops produced in a year)** in farms in most of the Indian States causing a **huge Demand-Supply Gap** in every market and thus the prices have again started to rise.
    - d. ***Most Important Conclusion*** : The **Major onion producing States** like Maharashtra, Madhya Pradesh, Karnataka, Gujarat, Rajasthan & few others will **always suffer less** than other **States where onion is not suitable** to grow for many reasons like Nagaland, Kerala, Tripura, Himachal Pradesh, Odisha & few more **when Onion production decreases.** 
       
    
- **Source** :
    - 01. https://theprint.in/india/onion-prices-surge-to-rs-165-per-kg-govt-promises-imports-by-january-2020/331628/#:~:text=The%20main%20reason%20for%20rise,Maharasthra%2C%20the%20key%20growing%20state.&text=To%20boost%20supply%20and%20contain%20price%20rise%2C%20the%20government%20has,expected%20to%20arrive%20mid%2DJanuary.
    - 02. https://theprint.in/opinion/how-india-can-ensure-onions-are-all-through-year-at-good-price/334477/#:~:text=There%20are%20three%20sowing%20seasons,harvested%20in%20March%2DMay).
    - 03. https://theprint.in/india/onion-prices-could-touch-rs-100-kg-by-oct-as-heavy-rain-damaged-early-kharif-crop-rabi-stock/499990/#:~:text=New%20crop%20expected%20in%20November&text=Singh%20said%20retail%20prices%20may,to%20arrive%20only%20in%20November%E2%80%9D.&text=The%20rise%20in%20prices%20is,the%20major%20onion%20producing%20regions.

In [None]:
# Bar Plot
# X-axis : Months
# Y-axis : Price

fig, axes = plt.subplots(nrows=11, ncols=2, figsize=(20, 60), sharey=True)
fig.suptitle("Average Onion Price in States for each month of Year 2020", fontsize=24)
sns.set_style("darkgrid")

states_list = Data['state'].unique()     # List of all States
rows = [x for x in range(0, 11)]
cols = [0]
count = 1

for state in states_list:
    # bar plot
    state_fig = Data.groupby(['state', 'Month']).mean().xs(state).plot(kind='bar',ax=axes[rows[0], cols[0]])
    state_fig.set_title(state, fontdict={'fontsize': 20, 'color' : 'red'})
    state_fig.set_xticklabels(labels=state_fig.get_xticklabels(), rotation=360)
    
    # column switch
    if cols[0] == 0:
        cols[0] = 1
    else :
        cols[0] = 0
        
    # rows switch
    count += 1
    if count > 2:
        rows.pop(0)
        count = 1
    
    fig.tight_layout()
    fig.subplots_adjust(top=0.96)

# ***States, Arrival Dates & Prices***
- We will be analysing ***Onion Prices Situation*** on their **Arrival in Market** in **each State Seperately**


- **Conclusion :**
    - a. **14 States out of 22** have seen huge **downfall in Minimum prices** for multiple times. This list includes **'Andhra Pradesh', 'Gujarat', 'Haryana', 'Karnataka', 'Kerala', 'Madhya Pradesh', 'Maharashtra', 'NCT of Delhi', 'Odisha','Punjab', 'Rajasthan', 'Telangana', 'Uttar Pradesh' & 'Uttrakhand'**. Minimum prices were as **low as 500₹/Quintal to Zero**.
    - b. **Note** that in case of **Nagaland**, the data has constant values for Jan. & Feb. that's the reason for a straight hoizontal line. 

In [None]:
# Line Plot
# X-axis : Arrival Dates
# Y-axis : Price

fig, axes = plt.subplots(nrows=22, ncols=1, figsize=(20, 100))
fig.suptitle("Average Onion Price in States on Arrival in Year 2020", fontsize=24)
sns.set_style("darkgrid")

states_list = Data['state'].unique()     # List of all States
rows = [x for x in range(0, 22)]
count = 1
                                                
for state in states_list:
    # bar plot
    state_fig = Data.sort_values(by=['Month', 'Date']).groupby(['state','arrival_date'], sort=False).mean().xs(state).reset_index().plot(kind='line',
                                                                        ax=axes[rows[0]], x='arrival_date', marker='o', markersize=3, markerfacecolor='black')
    state_fig.set_title(state, fontdict={'fontsize': 20, 'color' : 'red'})
    state_fig.set_xticklabels(labels=state_fig.get_xticklabels(), rotation=360)
    
    # rows switch
    rows.pop(0)
    
    fig.tight_layout()
    fig.subplots_adjust(top=0.96)

In [None]:
# States with Minimum Prices less than or equal to 500₹/Quintal
states = Data[Data['min_price'] <= 500]['state'].unique()
print('States with min. price <= 500₹')
for state in states:
    print('\t{s:}.'.format(s=state.title()))
#Data[(Data['min_price'] <= 500) & (Data['state']=='Andhra Pradesh')]['Month'].unique()

# Variety, Prices & States
- We will be relation between **variety & prices** using **box plot**
- Also relation between **variety & states** using **count plot**



- **Conclusion :**
    - a. **Small Onions** were highly **expensive variety** of onions. Their Average price is almost **5000₹/Quintal** & arrived most in **Kerala Markets**.
    - b. Onion Varities like **'Local, Other, Onion, Nashik, White, 1st Sort, Pusa-Red, Bombay UP'** are most preferred varities in many states.
    - c. where as, varities like **'Puna, Telagi, Big, 2nd Sort, Pole, Dry FAQ, Medium'** are preferred in just one state or more

In [None]:
# Box Plot : Variety of Onions wrt Prices
# Count Plot : Variety of Onions wrt State


fig, axes = plt.subplots(nrows=21, ncols=2, figsize=(20, 100))
fig.suptitle("Variety of Onions & relation wrt Prices & State", fontsize=24)
sns.set_style("darkgrid")

variety_list = Data['variety'].unique()   # List of all onion variety
rows = [x for x in range(0, 21)]
cols = [0]
                                                
for variety in variety_list: 
    # box plot
    box_plot = sns.boxplot(data=Data[Data['variety']==variety][['min_price', 'max_price', 'modal_price']], ax=axes[rows[0], cols[0]])
    box_plot.set_title(variety, fontdict={'fontsize': 20, 'color' : 'red'})
    box_plot.set_xticklabels(labels=box_plot.get_xticklabels(), rotation=360)
    
    # cols switch for count plot
    cols[0] = 1
    
    # count plot
    count_plot = sns.countplot(Data[Data['variety']==variety]['state'], ax=axes[rows[0], cols[0]])
    count_plot.set_title(variety, fontdict={'fontsize': 20, 'color' : 'red'})
    count_plot.set_xticklabels(labels=count_plot.get_xticklabels(), rotation=25, fontdict={'fontsize': 8})
    
    # rows & cols switch for box plot
    rows.pop(0)
    cols[0] = 0
    
    fig.tight_layout()
    fig.subplots_adjust(top=0.96)

# Percentage Change in Avg. Onion Prices considering Arrival Dates

In [None]:
# Percentage change in prices between arrival days

fig, axes = plt.subplots(nrows=22, ncols=1, figsize=(20, 100))
fig.suptitle("Percentage Change in Average Onion Price in States considering Arrivals in Year 2020", fontsize=24)
sns.set_style("darkgrid")

states_list = Data['state'].unique()     # List of all States
rows = [x for x in range(0, 22)]
count = 1
                                                
for state in states_list:
    # bar plot
    state_fig = Data.sort_values(by=['Month', 'Date']).groupby(['state','arrival_date'], sort=False).mean().xs(state).pct_change().reset_index().plot(kind='line',
                                                                        ax=axes[rows[0]], x='arrival_date', marker='o', markersize=3, markerfacecolor='black')
    state_fig.set_title(state, fontdict={'fontsize': 20, 'color' : 'red'})
    state_fig.set_xticklabels(labels=state_fig.get_xticklabels(), rotation=360)
    
    # rows switch
    rows.pop(0)
    
    fig.tight_layout()
    fig.subplots_adjust(top=0.96)



- If you found this notebook to be helpful, a **like** would be appreciated.
- **Thank You :)**