# Agriculture Commodities, Prices & Seasons 

In [22]:
import pandas as pd
import numpy as np

### Load and view data

In [36]:
monthly_cmo_data = "Monthly_data_cmo.csv"
monthly_cmo_df = pd.read_csv(monthly_cmo_data)

In [37]:
monthly_data_df.columns

Index(['APMC', 'Commodity', 'Year', 'Month', 'arrivals_in_qtl', 'min_price',
       'max_price', 'modal_price', 'date', 'district_name', 'state_name'],
      dtype='object')

Variable description:
* msprice- Minimum Support Price
* arrivals_in_qtl- Quantity arrival in market (in quintal)
* min_price- Minimum price charged per quintal
* max_price- Maximum price charged per quintal
* modal_price- Mode (Average) price charged per quintal


In [38]:
monthly_cmo_df.head()

Unnamed: 0,APMC,Commodity,Year,Month,arrivals_in_qtl,min_price,max_price,modal_price,date,district_name,state_name
0,Ahmednagar,Bajri,2015,April,79,1406,1538,1463,2015-04,Ahmadnagar,Maharashtra
1,Ahmednagar,Bajri,2016,April,106,1788,1925,1875,2016-04,Ahmadnagar,Maharashtra
2,Ahmednagar,Wheat(Husked),2015,April,1253,1572,1890,1731,2015-04,Ahmadnagar,Maharashtra
3,Ahmednagar,Wheat(Husked),2016,April,387,1750,2220,1999,2016-04,Ahmadnagar,Maharashtra
4,Ahmednagar,Sorgum(Jawar),2015,April,3825,1600,2200,1900,2015-04,Ahmadnagar,Maharashtra


In [39]:
monthly_cmo_df.tail()

Unnamed: 0,APMC,Commodity,Year,Month,arrivals_in_qtl,min_price,max_price,modal_price,date,district_name,state_name
62424,Shrigonda,GRAM,2016,November,586,5700,6367,6200,2016-11,Ahmadnagar,Maharashtra
62425,Shrigonda,GREEN GRAM,2016,November,2,5000,5000,5000,2016-11,Ahmadnagar,Maharashtra
62426,Shrigonda,BLACK GRAM,2016,November,46,4700,6933,6400,2016-11,Ahmadnagar,Maharashtra
62427,Shrigonda,SOYBEAN,2016,November,166,2583,2708,2633,2016-11,Ahmadnagar,Maharashtra
62428,Shrigonda,SUNFLOWER,2016,November,74,2933,3200,3067,2016-11,Ahmadnagar,Maharashtra


At first glance the data doesn't seems organisedd by date.

Let's see if data has missing values, and type of each column

In [40]:
monthly_cmo_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 62429 entries, 0 to 62428
Data columns (total 11 columns):
APMC               62429 non-null object
Commodity          62429 non-null object
Year               62429 non-null int64
Month              62429 non-null object
arrivals_in_qtl    62429 non-null int64
min_price          62429 non-null int64
max_price          62429 non-null int64
modal_price        62429 non-null int64
date               62429 non-null object
district_name      62429 non-null object
state_name         62429 non-null object
dtypes: int64(5), object(6)
memory usage: 5.2+ MB


Basic statistical information about data

In [41]:
monthly_cmo_df.describe()

Unnamed: 0,Year,arrivals_in_qtl,min_price,max_price,modal_price
count,62429.0,62429.0,62429.0,62429.0,62429.0
mean,2015.337503,6043.088,2945.228,3688.814,3296.003989
std,0.690451,34703.31,13183.96,7662.962,3607.792534
min,2014.0,1.0,0.0,0.0,0.0
25%,2015.0,38.0,1250.0,1600.0,1450.0
50%,2015.0,211.0,1976.0,2797.0,2425.0
75%,2016.0,1364.0,3900.0,4647.0,4257.0
max,2016.0,1450254.0,3153038.0,1600090.0,142344.0


Let's dig in data some deeper

In [50]:
monthly_cmo_df.Year.unique()

array([2015, 2016, 2014])

In [54]:
monthly_cmo_df.APMC.value_counts()

Mumbai                    1538
Pune                      1513
Nagpur                    1340
Barshi                    1076
Jalgaon                   1055
Solapur                    984
Kamthi                     967
Shrirampur                 947
Kalyan                     932
Pune-Manjri                930
Aurangabad                 918
Rahata                     848
Karad                      831
Amarawati                  817
Kolhapur                   816
Nashik                     750
Pune-Pimpri                681
Ahmednagar                 649
Ramtek                     621
Satara                     549
Akluj                      542
Osmanabad                  510
Sangali                    462
Ratanagari                 457
Manchar                    455
Chandrapur-Ganjwad         454
Dhule                      434
Kalmeshwar                 415
Vadgaon Peth               415
Kalamb (Os)                407
                          ... 
Hingoli-Kanegaon Naka       13
Umrane  

In [None]:
monthly_cmo_df.

Let's also load other `csv` file and view it

In [29]:
cmo_msp = 'CMO_MSP_Mandi.csv'
cmo_msp_df = pd.read_csv(cmo_msp)

In [31]:
cmo_msp_df.columns

Index(['commodity', 'year', 'Type', 'msprice', 'msp_filter'], dtype='object')

In [32]:
cmo_msp_df.head()

Unnamed: 0,commodity,year,Type,msprice,msp_filter
0,PADDY-UNHUSKED,2012,Kharif Crops,1250.0,1
1,RICE(PADDY-HUS),2012,Kharif Crops,1280.0,1
2,Jowar_Hybrid,2012,Kharif Crops,1500.0,1
3,SORGUM(JAWAR),2012,Kharif Crops,1520.0,1
4,BAJRI,2012,Kharif Crops,1175.0,1


In [55]:
cmo_msp_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 155 entries, 0 to 154
Data columns (total 5 columns):
commodity     155 non-null object
year          155 non-null int64
Type          155 non-null object
msprice       145 non-null float64
msp_filter    155 non-null int64
dtypes: float64(1), int64(2), object(2)
memory usage: 6.1+ KB


Here we can see we have misiing values in `cmo_msp_df`. `msprice` has 10 values missing.

In [56]:
cmo_msp_df.year.unique(), cmo_msp_df.Type.unique(), cmo_msp_df.commodity.unique(), cmo_msp_df.msp_filter.unique()

(array([2012, 2013, 2014, 2015, 2016]),
 array(['Kharif Crops', 'Rabi Crops', 'Other Crops'], dtype=object),
 array(['PADDY-UNHUSKED', 'RICE(PADDY-HUS)', 'Jowar_Hybrid',
        'SORGUM(JAWAR)', 'BAJRI', 'MAIZE', 'Ragi_Maldandi',
        'PIGEON PEA (TUR)', 'SPILT GERRN GRAM', 'SPLIT BLACK GRAM',
        'COTTON', 'Cotton_Long Staple', 'GR.NUT KERNELS', 'SUNFLOWER',
        'Soyabean_Black', 'SOYABEAN', 'SESAMUM', 'NIGER-SEED',
        'WHEAT(HUSKED)', 'WHEAT(UNHUSKED)', 'BARLI', 'Gram_Yellow',
        'Masur_Yellow', 'MUSTARD', 'SAFFLOWER', 'Toria_Yellow',
        'Copra (Milling)_Yellow', 'Copra (Ball)_Yellow', 'COCONUT',
        'Jute_Yellow', 'SUGAR-CANE', 'SUGARCANE'], dtype=object),
 array([1]))