In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Import the libraries

In [None]:
import warnings
warnings.filterwarnings('ignore')
import matplotlib.pyplot as plt
import geopandas
import pycountry
import plotly.express as ex

In [None]:
gdp = pd.read_csv("/kaggle/input/country-regional-and-world-gdp/gdp_csv.csv")

# We gonna do some Exploratory Data Analysis

In [None]:
gdp.head()

In [None]:
gdp.isnull().sum()

There is No missing values

In [None]:
len(gdp["Country Name"].unique())

We gonna make visualize the most 15 Gross Domestic Product in the world from 1960 to 2016

In [None]:
Sum = gdp['Value'].groupby(gdp['Country Code']).sum()
first_15 = Sum.sort_values(ascending=True)[:15]
first_15

In [None]:
Sum.max()

In [None]:
first_15.plot(kind = 'bar',xlim=10, color='red')
plt.show()

In [None]:
gdp["Country Name"].unique()

We can Get the Countries Names with pycountry :

In [None]:
#countries = []
#for x in pycountry.countries:
#    countries.append(x.name)

In [None]:
countries = ['Arab World', 'Caribbean small states',
       'Central Europe and the Baltics', 'Early-demographic dividend',
       'East Asia & Pacific',
       'East Asia & Pacific (excluding high income)',
       'East Asia & Pacific (IDA & IBRD countries)', 'Euro area',
       'Europe & Central Asia',
       'Europe & Central Asia (excluding high income)',
       'Europe & Central Asia (IDA & IBRD countries)', 'European Union',
       'Fragile and conflict affected situations',
       'Heavily indebted poor countries (HIPC)', 'High income',
       'IBRD only', 'IDA & IBRD total', 'IDA blend', 'IDA only',
       'IDA total', 'Late-demographic dividend',
       'Latin America & Caribbean',
       'Latin America & Caribbean (excluding high income)',
       'Latin America & the Caribbean (IDA & IBRD countries)',
       'Least developed countries: UN classification',
       'Low & middle income', 'Low income', 'Lower middle income',
       'Middle East & North Africa',
       'Middle East & North Africa (excluding high income)',
       'Middle East & North Africa (IDA & IBRD countries)',
       'Middle income', 'North America', 'OECD members',
       'Other small states', 'Pacific island small states',
       'Post-demographic dividend', 'Pre-demographic dividend',
       'Small states', 'South Asia', 'South Asia (IDA & IBRD)',
       'Sub-Saharan Africa', 'Sub-Saharan Africa (excluding high income)',
       'Sub-Saharan Africa (IDA & IBRD countries)', 'Upper middle income',
       'World']

In [None]:
print(countries)

In [None]:
df_country = gdp.loc[~gdp['Country Name'].isin(countries)]

We filtred the Countries tha they didn't exist in the map dictionnary

In [None]:
len(df_country['Country Name'].unique())

We gonna change some countries names !
note : there are also some countries tha they still need to correct

In [None]:
df_country = df_country.replace('United States','United States of America')
df_country = df_country.replace('Russian Federation','Russia')

In [None]:
annotations = []
fig = ex.line(df_country, x="Year", y="Value", color="Country Name",
              line_group="Country Name", hover_name="Country Name")
annotations.append(dict(xref='paper', yref='paper', x=0.0, y=1.05,
                              xanchor='left', yanchor='bottom',
                              text='GDP over the years (1960 - 2016)',
                              font=dict(family='Arial',
                                        size=30),
                              showarrow=False))
fig.update_layout(annotations=annotations)
fig.show()

In [None]:
world = geopandas.read_file(geopandas.datasets.get_path("naturalearth_lowres"))

In [None]:
df_country_final = world.merge(df_country, how="left", left_on=['name'], right_on=['Country Name'])

In [None]:
world.name.unique()

In [None]:
df_country_final.isnull().sum()

In [None]:
df_country_final.shape

In [None]:
#world = geopandas.read_file(geopandas.datasets.get_path("naturalearth_lowres"))
#print("Geometry Column Name : ", world.geometry.name)
#print("Dataset Size : ", world.shape)
#world.head()
df_country_final.plot('Value',figsize=(20,14),legend=True,
                           legend_kwds={"label":"Gdp By Countrie", "orientation":"horizontal"});

In [None]:
gdp = pd.read_csv("/kaggle/input/country-regional-and-world-gdp/gdp_csv.csv")

In [None]:
gdp['Date'] = pd.to_datetime(gdp.Year, format='%Y')
gdp.set_index('Date', inplace=True)
gdp = gdp.loc[gdp["Country Name"] == "United States"]
gdp.head()

In [None]:
from statsmodels.tsa.arima_model import ARIMA
model = ARIMA(gdp.Value, order=(3,1,3))
result_AR = model.fit(disp = 0)

prediction for USA

In [None]:
result_AR.plot_predict(1,80)
x=result_AR.forecast(steps=200)

In [None]:
gdp = pd.read_csv("/kaggle/input/country-regional-and-world-gdp/gdp_csv.csv")

In [None]:
gdp['Date'] = pd.to_datetime(gdp.Year, format='%Y')
gdp.set_index('Date', inplace=True)
gdp = gdp.loc[gdp["Country Name"] == "China"]
gdp.head()

In [None]:
from statsmodels.tsa.arima_model import ARIMA
model = ARIMA(gdp.Value, order=(1,1,2))
result_AR = model.fit(disp = 0)

Prediction for China

In [None]:
result_AR.plot_predict(1,80)
x=result_AR.forecast(steps=200)