In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import plotly_express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np # linear algebra
import seaborn as sns



# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
        
        

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
pip install pandas-alive

In [None]:
pip install bar_chart_race

In [None]:
px.scatter(px.data.gapminder(), x="gdpPercap", y="lifeExp", animation_frame="year", animation_group="country",
           size="pop", color="country", hover_name="country", 
           log_x = True, 
           size_max=45, range_x=[100,100000], range_y=[25,90])

In [None]:

df = px.data.gapminder().query("year == 2007")
df["world"] = "world" # in order to have a single root node
fig = px.treemap(df, path=['world', 'continent', 'country'], values='pop',
                  color='lifeExp', hover_data=['iso_alpha'],
                  color_continuous_scale='RdBu',
                  color_continuous_midpoint=np.average(df['lifeExp'], weights=df['pop']))
fig.show()

In [None]:
# Get Data: this ex will only use part of it (i.e. rows 750-1500)
data = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/gapminderDataFiveYear.csv')

start, end = 750, 1500

fig = go.Figure(data=go.Scatter3d(
    x=data['year'][start:end],
    y=data['continent'][start:end],
    z=data['pop'][start:end],
    text=data['country'][start:end],
    mode='markers',
    marker=dict(
        sizemode='diameter',
        sizeref=750,
        size=data['gdpPercap'][start:end],
        color = data['lifeExp'][start:end],
        colorscale = 'Viridis',
        colorbar_title = 'Life<br>Expectancy',
        line_color='rgb(140, 140, 170)'
    )
))


fig.update_layout(height=800, width=800,
                  title='Examining Population and Life Expectancy Over Time')

fig.show()

In [None]:
import pandas_profiling
report = pandas_profiling.ProfileReport(data)
from IPython.display import display
display(report)

In [None]:
fig = px.bar(data, x="country", y="lifeExp", color="country",
  animation_frame="year", animation_group="country", range_y=[0,100])
fig.show()

In [None]:
fig = px.bar(data, x="country", y="lifeExp", color="country",
  animation_frame="year", animation_group="country")
fig.show()

In [None]:
px.scatter(data, x="year", y="lifeExp", animation_frame="year", animation_group="country",
           size="lifeExp", color="country", hover_name="country",
           log_x=True, size_max=55, range_x=[1950,2017], range_y=[1,110])


In [None]:
data.describe()

In [None]:
data.mean

In [None]:
#Finding correlations

corr = data.corr()

mask = np.triu(np.ones_like(corr, dtype=np.bool))
corr = corr.mask(mask)
fig = ff.create_annotated_heatmap(
    z=corr.to_numpy().round(2),
    x=list(corr.index.values),
    y=list(corr.columns.values),       
    xgap=3, ygap=3,
    zmin=-1, zmax=1,
colorscale='icefire',
    colorbar_thickness=30,
    colorbar_ticklen=3,
)
fig.update_layout(title_text='Correlation Matrix (impact relationship with numbers)',
                  title_x=0.5,
                  titlefont={'size': 20},
                  width=600, height=600,
                  xaxis_showgrid=False,
                  xaxis={'side': 'bottom'},
                  yaxis_showgrid=False,
                  yaxis_autorange='reversed',                   
                  paper_bgcolor=None,
                  template="simple_white"
                  )
fig.show()


In [None]:
sns.pairplot(data,palette='bright')
%time

In [None]:
sns.set(style="ticks", color_codes=True)
g = sns.pairplot(data, kind="reg", plot_kws={'line_kws':{'color':'red'}})
plt.show()
%time

# Year Correlations and Predictions

In [None]:
sns.jointplot(x="year", y="gdpPercap", data=data, kind = 'reg',fit_reg= True, size = 12)
plt.show()

In [None]:
sns.jointplot(x="year", y="lifeExp", data=data, kind = 'reg',fit_reg= True, size = 12)
plt.show()

In [None]:
sns.jointplot(x="year", y="pop", data=data, kind = 'reg',fit_reg= True, size = 12)
plt.show()

# Population Correlations and Predictions

In [None]:
sns.jointplot(x="pop", y="year", data=data, kind = 'reg',fit_reg= True, size = 12)
plt.show()

In [None]:
sns.jointplot(x="pop", y="lifeExp", data=data, kind = 'reg',fit_reg= True, size = 12)
plt.show()

In [None]:
sns.jointplot(x="pop", y="gdpPercap", data=data, kind = 'reg',fit_reg= True, size = 12)
plt.show()

# Life Expectancy Correlations and Predictions

In [None]:
sns.jointplot(x="lifeExp", y="year", data=data, kind = 'reg',fit_reg= True, size = 12)
plt.show()

In [None]:
sns.jointplot(x="lifeExp", y="pop", data=data, kind = 'reg',fit_reg= True, size = 12)
plt.show()

In [None]:
sns.jointplot(x="lifeExp", y="gdpPercap", data=data, kind = 'reg',fit_reg= True, size = 12)
plt.show()

# GDP Per Capita Correlations and Predictions

In [None]:
sns.jointplot(x="gdpPercap", y="year", data=data, kind = 'reg',fit_reg= True, size = 12)
plt.show()

In [None]:
sns.jointplot(x="gdpPercap", y="pop", data=data, kind = 'reg',fit_reg= True, size = 12)
plt.show()

In [None]:
sns.jointplot(x="gdpPercap", y="lifeExp", data=data, kind = 'reg',fit_reg= True, size = 12)
plt.show()

data.describe

In [None]:
data.describe

# Distributions

In [None]:
cmap = sns.cubehelix_palette(rot=-.9, as_cmap=True)
g = sns.relplot(
    data=data,
    x="lifeExp", y="gdpPercap",
    hue="year", size="pop",
    palette=cmap, sizes=(40, 800),
)
g.set(xscale="log", yscale="log")
g.ax.xaxis.grid(True, "minor", linewidth=.25)
g.ax.yaxis.grid(True, "minor", linewidth=.25)
g.despine(left=True, bottom=True);

In [None]:
sns.displot(data, x="gdpPercap", bins=30);
sns.displot(data, x="pop", bins=30);
sns.displot(data, x="lifeExp", bins=30);


In [None]:
sns.displot(data, x="gdpPercap", kind="kde");
sns.displot(data, x="pop", kind="kde");
sns.displot(data, x="lifeExp", kind="kde");


In [None]:
sns.displot(data, x="lifeExp", hue="country", kind="kde", fill=True);

In [None]:
sns.displot(data, x="lifeExp", y="gdpPercap");

In [None]:
sns.displot(data, x="lifeExp", y="gdpPercap", kind="kde")

In [None]:
sns.displot(data, x="lifeExp", y="gdpPercap", hue="year");

In [None]:
sns.displot(data, x="gdpPercap", y="lifeExp", hue="year", kind="kde");