# Livestock Production

In [1]:
import pandas as pd
import numpy as np

In [2]:
# read dataframe

path = 'livestock.csv'
df = pd.read_csv(path)
df.head()

Unnamed: 0,AreaName,Year,Stock_largeAni,Stock_cattle,Stock_horse,Stock_donkey,Stock_mule,Stock_camel,Out_hog,Stock_hog,...,MeatYeild_gt_shp,MilkYeild,MilkYeild_cattle,WoolYeild_sheep,WoolYeild_sheep_fine,WoolYeild_sheep_semi,WoolYeild_goat,WoolYeild_goat_cashmere,EggYeild,HoneyYeild
0,China,1996,13360.6,11031.8,871.5,944.4,478.0,34.9,41225.1,36283.6,...,181.0,735.8,629.4,298102.0,121020.0,74099.0,35255.0,9585.0,1965.2,18.4
1,China,1997,14541.8,12175.7,891.2,952.8,480.6,35.0,46483.7,40034.8,...,212.8,681.1,601.1,255059.0,116054.0,55683.0,25865.0,8626.0,1895.3,21.1
2,China,1998,14803.2,12441.9,898.1,955.8,473.9,33.5,50215.1,42256.3,...,234.6,745.4,662.9,277545.0,115752.0,68775.0,31417.0,9799.0,2021.3,20.7
3,China,1999,15024.75,12698.34,891.41,934.77,467.27,32.96,51977.2,43144.2,...,251.2643,806.9073,717.5939,283152.0,114103.0,73700.0,31849.0,10179.65,2134.667,22.9907
4,China,2000,15151.51,12866.34,876.58,922.73,453.03,32.62,52673.34,44681.54,...,273.958296,919.118908,827.43079,292502.0,117386.0,84921.0,33266.0,11057.0,2243.3,24.6


In [3]:
df.columns.values

array(['AreaName', 'Year', 'Stock_largeAni', 'Stock_cattle',
       'Stock_horse', 'Stock_donkey', 'Stock_mule', 'Stock_camel',
       'Out_hog', 'Stock_hog', 'Stock_gt_shp', 'Stock_goat',
       'Stock_sheep', 'Out_cattle', 'Out_gt_shp', 'Out_poultry',
       'MeatYeild', 'MeatYeild_hg_ct_gt_shp', 'MeatYeild_hog',
       'MeatYeild_cattle', 'MeatYeild_gt_shp', 'MilkYeild',
       'MilkYeild_cattle', 'WoolYeild_sheep', 'WoolYeild_sheep_fine',
       'WoolYeild_sheep_semi', 'WoolYeild_goat',
       'WoolYeild_goat_cashmere', 'EggYeild', 'HoneyYeild'], dtype=object)

In [4]:
df['AreaName'].unique()

array(['China', 'Beijing', 'Tianjin', 'Hebei', 'Shanxi', 'Inner Mongolia',
       'Liaoning', 'Jilin', 'Heilongjiang', 'Shanghai', 'Jiangsu',
       'Zhejiang', 'Anhui', 'Fujian', 'Jiangxi', 'Shandong', 'Henan',
       'Hubei', 'Hunan', 'Guangdong', 'Guangxi', 'Hainan', 'Chongqing',
       'Sichuan', 'Guizhou', 'Yunnan', 'Tibet', 'Shaanxi', 'Gansu',
       'Qinghai', 'Ningxia', 'Xinjiang'], dtype=object)

In [5]:
df['Year'].unique()

array([1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
       2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017],
      dtype=int64)

In [6]:
# mask missing data with 'True'

df_null = df.isnull()
df_null.head()

Unnamed: 0,AreaName,Year,Stock_largeAni,Stock_cattle,Stock_horse,Stock_donkey,Stock_mule,Stock_camel,Out_hog,Stock_hog,...,MeatYeild_gt_shp,MilkYeild,MilkYeild_cattle,WoolYeild_sheep,WoolYeild_sheep_fine,WoolYeild_sheep_semi,WoolYeild_goat,WoolYeild_goat_cashmere,EggYeild,HoneyYeild
0,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [7]:
# find the columns where missing values exist
null_counts = {}
for column in df.columns.tolist():    
    null_count = df_null.groupby([column]).size()[False]
    if null_count < len(df.index.values): null_counts[column] = null_count
null_counts

{'Stock_horse': 561,
 'Stock_donkey': 529,
 'Stock_mule': 529,
 'Stock_camel': 158,
 'Stock_sheep': 544,
 'Out_cattle': 558,
 'Out_gt_shp': 558,
 'Out_poultry': 558,
 'MeatYeild_cattle': 633,
 'WoolYeild_sheep': 540,
 'WoolYeild_sheep_fine': 484,
 'WoolYeild_sheep_semi': 504,
 'WoolYeild_goat': 559,
 'WoolYeild_goat_cashmere': 426,
 'HoneyYeild': 601}

1. The missing valuse in 'Out_cattle', 'Out_gt_shp', 'Out_poultry' because of the lackinng of statistics in the year 1997 and 2017.
2. The valuse in other columns are missing because they are zeros.

In [8]:
# replace missing valuse in by 0s.

columns = ['Stock_horse','Stock_donkey','Stock_mule','Stock_camel','Stock_sheep','MeatYeild_cattle',\
'WoolYeild_sheep','WoolYeild_sheep_fine','WoolYeild_sheep_semi','WoolYeild_goat','WoolYeild_goat_cashmere',\
'HoneyYeild']

df[columns] = df[columns].fillna(0)
df.describe(include='all')

Unnamed: 0,AreaName,Year,Stock_largeAni,Stock_cattle,Stock_horse,Stock_donkey,Stock_mule,Stock_camel,Out_hog,Stock_hog,...,MeatYeild_gt_shp,MilkYeild,MilkYeild_cattle,WoolYeild_sheep,WoolYeild_sheep_fine,WoolYeild_sheep_semi,WoolYeild_goat,WoolYeild_goat_cashmere,EggYeild,HoneyYeild
count,642,642.0,642.0,642.0,642.0,642.0,642.0,642.0,642.0,642.0,...,642.0,642.0,642.0,642.0,642.0,642.0,642.0,642.0,642.0,642.0
unique,32,,,,,,,,,,...,,,,,,,,,,
top,China,,,,,,,,,,...,,,,,,,,,,
freq,22,,,,,,,,,,...,,,,,,,,,,
mean,,2007.465732,848.01159,737.702808,44.209491,44.440794,19.939555,1.876654,4059.385176,2972.681655,...,24.01433,168.170416,160.284607,23574.031009,8111.313219,7217.599774,2410.425487,994.260982,172.277924,2.324778
std,,5.794417,2262.837114,1961.245434,123.495124,128.605836,60.566196,5.960201,11024.658981,8016.22297,...,66.308076,480.472562,460.266432,66989.492238,23963.64205,20118.954097,6589.1018,2916.48464,472.688429,6.725113
min,,1996.0,1.23,1.23,0.0,0.0,0.0,0.0,12.6,21.3,...,0.226204,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0
25%,,2002.0,123.272883,111.035,0.224125,0.1925,0.160025,0.0,563.8125,440.447977,...,2.604375,13.4125,12.945695,14.1325,0.0,3.66775,10.0,0.0,16.2,0.2
50%,,2007.0,454.94,383.81,4.48275,6.965,2.405,0.0,1755.2324,1339.595857,...,7.157926,32.70555,30.14,3316.0,336.065,1148.045,405.5,34.7,41.19205,0.638234
75%,,2012.0,627.283775,541.169372,40.23,32.25,13.425,0.0,3180.3775,2302.897361,...,15.63,92.162369,83.4448,14979.3475,3565.5,5200.75,1708.75,769.5514,119.9495,1.27311


In [9]:
# df_cn = df[df['AreaName'] == 'China']
# df_cn.head()

In [16]:
# df_provinces = df[(df['AreaName'] == 'Beijing')|
#                   (df['AreaName'] == 'Tianjin')|
#                   (df['AreaName'] == 'Hebei')|
#                   (df['AreaName'] == 'Shanxi')|
#                   (df['AreaName'] == 'Inner Mongolia')|
#                   (df['AreaName'] == 'Liaoning')|
#                   (df['AreaName'] == 'Jilin')|
#                   (df['AreaName'] == 'Heilongjiang')|
#                   (df['AreaName'] == 'Shanghai')|
#                   (df['AreaName'] == 'Jiangsu')|
#                   (df['AreaName'] == 'Zhejiang')|
#                   (df['AreaName'] == 'Anhui')|
#                   (df['AreaName'] == 'Fujian')|
#                   (df['AreaName'] == 'Jiangxi')|
#                   (df['AreaName'] == 'Shandong')|
#                   (df['AreaName'] == 'Henan')|
#                   (df['AreaName'] == 'Hubei')|
#                   (df['AreaName'] == 'Hunan')|
#                   (df['AreaName'] == 'Guangdong')|
#                   (df['AreaName'] == 'Guangxi')|
#                   (df['AreaName'] == 'Hainan')|
#                   (df['AreaName'] == 'Chongqing')|
#                   (df['AreaName'] == 'Sichuan')|
#                   (df['AreaName'] == 'Guizhou')|
#                   (df['AreaName'] == 'Yunnan')|
#                   (df['AreaName'] == 'Tibet')|
#                   (df['AreaName'] == 'Shaanxi')|
#                   (df['AreaName'] == 'Gansu')|
#                   (df['AreaName'] == 'Qinghai')|
#                   (df['AreaName'] == 'Ningxia')|
#                   (df['AreaName'] == 'Xinjiang')]
# df_provinces.head()

In [20]:
df['Out_cattle_eS'],df['Out_cattle_eM'],df['Out_gt_shp_eS'],df['Out_gt_shp_eM'],df['Out_poultry_eS'],df['Out_poultry_eM'] = [np.nan,np.nan,np.nan,np.nan,np.nan,np.nan]

In [35]:
Stock_cattle_1999 = df['Stock_cattle'][(df['Year']==1999)].values

In [38]:
df['Out_cattle'][(df['Year']==1998)].

AttributeError: 'Series' object has no attribute 'fill'

In [8]:
#df['Year'] = df['Year'].astype(str)
#df['Year'].unique()

In [11]:
# # set index
# df.set_index(['AreaName', 'Year'], inplace=True)
# df.head()

In [13]:
# data disctiption
# print(df.dtypes)
# print(df.columns.tolist)

In [1]:
# cities = ['Beijing','Tianjin','Hebei','Shanxi','Inner Mongolia',\
#           'Liaoning','Jilin','Heilongjiang',\
#           'Shanghai','Jiangsu','Zhejiang','Anhui','Fujian','Jiangxi','Shandong',\
#           'Henan','Hubei','Hunan','Guangdong','Guangxi','Hainan',\
#           'Chongqing','Sichuan','Guizhou','Yunnan','Tibet',\
#           'Shaanxi','Gansu','Qinghai','Ningxia','Xinjiang']

# years = range(1997, 2017)

# stocks = ['Stock_cattle', 'Stock_horse', 'Stock_donkey','Stock_mule', 'Stock_camel', \
#           'Stock_hog', 'Stock_goat', 'Stock_sheep']

# outputs = ['Out_hog', 'Out_cattle', 'Out_gt_shp', 'Out_poultry']

# yeilds = ['MeatYeild_hog', 'MeatYeild_cattle', 'MeatYeild_gt_shp', \
#           'MilkYeild_cattle', 'EggYeild', \
#           'WoolYeild_sheep', 'WoolYeild_sheep_fine', 'WoolYeild_sheep_semi', \
#           'WoolYeild_goat', 'WoolYeild_goat_cashmere']