# Import the libraries and the dataset

In [None]:
import pandas as pd

In [None]:
import_data = pd.read_csv('/kaggle/input/india-trade-data/2018-2010_import.csv')

In [None]:
import_data.head()

In [None]:
import_data.shape

## Get the different Commodities, Countries name and Year

In [None]:
unique_commodities = import_data['Commodity'].unique()

In [None]:
len(unique_commodities)

In [None]:
unique_countries = import_data['country'].unique()

In [None]:
len(unique_countries)

In [None]:
unique_year = import_data['year'].unique()

In [None]:
len(unique_year)

## Visualize the value count for each year

In [None]:
yearly_data = import_data.groupby('year')['value'].sum()

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
sns.set()

In [None]:
font_dict = {
    'size': 20,
    'weight': 'bold'
}

In [None]:
plt.figure(figsize=(10, 4))
plt.subplot(211)
plt.title("Import Quantity-vs-Year", fontdict=font_dict)
plt.bar(x=yearly_data.index, height=yearly_data.values)
plt.ylabel('Import Quantity')
plt.subplot(212)
plt.plot(yearly_data)
plt.xlabel('Year')
plt.ylabel('Import Quantity')
plt.show()

## Visualize the import count by each country, on yearly basis

In [None]:
country_import = import_data.groupby(['country', 'year'])['value'].sum()

In [None]:
country_import = country_import.sort_index(axis=0, ascending=True)

In [None]:
country_import

In [None]:
country_import.loc['AFGHANISTAN TIS']

In [None]:
plt.figure(figsize=(10, 8))
plt.pie(country_import.loc['AFGHANISTAN TIS'].values, labels=country_import.loc['AFGHANISTAN TIS'].index,
       autopct="%.2f")
plt.title("Trade Analysis ", fontdict=font_dict)

In [None]:
import_data['Commodity'].unique()[:20]

## Some of the questions that need to be answered?

1. What did India imported the most in any 2016, 2018 year? 
2. Which commodity forms a major chunk of trade?
3. How has the trade between India and any given country grown over time? (Countries - USA, China, IRAN, Japan, Russia, Afghanistan, Pakistan)

### 1. What did India import the most in any 2016, 2018 year? 

In [None]:
commodities_2016_sum = import_data[import_data['year'] == 2016].groupby('Commodity')['value'].sum()

In [None]:
commodities_2016_sum.sort_values(ascending=False).head(1).index

In [None]:
commodities_2018_sum = import_data[import_data['year'] == 2018].groupby('Commodity')['value'].sum()

In [None]:
commodities_2018_sum.sort_values(ascending=False).head(1).index

### Answer - Most Imported Item

1. 2016 - NATURAL OR CULTURED PEARLS.. (Jewellery Item)
2. 2018 - MINERAL FUELS.. (Natural Resource)

### 2. Which commodity forms a major chunk of trade?

In [None]:
commodities_sum = import_data.groupby('Commodity')['value'].sum().sort_values(ascending=False)

In [None]:
commodities_sum = commodities_sum[:10]

In [None]:
## reduce the name of the commodities so that it's easier to visualize
commodities_sum.index = commodities_sum.index.map(lambda x: x[:15])

In [None]:
plt.figure(figsize=(10, 8))
plt.pie(commodities_sum, labels=commodities_sum.index, autopct="%.2f")
plt.title('Resource-vs-Import Share Percent', fontdict=font_dict)

### Answer - Most Imported Item

1. Mineral Fuels.
2. Natural or Cultured Pearls.

### 3. How has the trade between India and any given country grown over time? (Countries - USA, China, IRAN, Japan, Russia, United Arab Emirates, UK, Afghanistan, Pakistan)

In [None]:
import_data['country'][import_data['country'].str.startswith('U')].unique()

In [None]:
import_data['country'][import_data['country'].str.startswith('C')].unique()

In [None]:
import_data['country'][import_data['country'].str.startswith('I')].unique()

In [None]:
import_data['country'][import_data['country'].str.startswith('J')].unique()

In [None]:
import_data['country'][import_data['country'].str.startswith('R')].unique()

In [None]:
import_data['country'][import_data['country'].str.startswith('A')].unique()

In [None]:
import_data['country'][import_data['country'].str.startswith('P')].unique()

In [None]:
countries_list = ['U S A', 'U ARAB EMTS', 'U K', 'CHINA P RP', 'IRAN', 'JAPAN', 'RUSSIA', 'AFGHANISTAN TIS', 'PAKISTAN IR']

In [None]:
specific_countries_data = import_data[import_data['country'].isin(countries_list)].groupby(['country', 'year'])['value'].sum()

In [None]:
specific_countries_data

#### Which country accounts for maximum trade in the list of specified countries

In [None]:
countries_share = import_data[import_data['country'].isin(countries_list)].groupby('country')['value'].sum().sort_values(ascending=False)

In [None]:
countries_share = countries_share.apply(lambda x: round(100 * x/countries_share.sum()))

In [None]:
countries_share

In [None]:
plt.figure(figsize=(12, 8))
plt.pie(countries_share, labels=countries_share.index, autopct="%.2f")
plt.title('Countries Percentage Share', fontdict=font_dict)

### How the trade has grown over time for China?

In [None]:
plt.figure(figsize=(10, 8))
plt.subplot(211)
plt.bar(x=specific_countries_data.loc['CHINA P RP'].index, height=specific_countries_data.loc['CHINA P RP'].values)
plt.title('Import-vs-Year for China', fontdict=font_dict)
plt.subplot(212)
plt.plot(specific_countries_data.loc['CHINA P RP'])
plt.xlabel('Year')
plt.ylabel('Import in US($)')

### Visualizing trade statistics regarding specified countries

In [None]:
for country in countries_list:
    plt.figure(figsize=(10, 8))
    plt.subplot(211)
    plt.bar(x=specific_countries_data.loc[country].index, height=specific_countries_data.loc[country].values)
    title_name = 'Import-vs-Year for ' + country
    plt.title(title_name, fontdict=font_dict)
    plt.subplot(212)
    plt.plot(specific_countries_data.loc[country])
    plt.xlabel('Year')
    plt.ylabel('Import in US($)')