In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import plotly_express as px # data visualization
import seaborn as sns
import matplotlib.pyplot as plt
import os

#Create a base dataframe to concat
base_df = pd.DataFrame(columns=['SeriesName', 'SeriesCode', 'CountryName', 'CountryCode', 'Year',
       'Value','File'])

#interact over data and concat all
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        pathfile = os.path.join(dirname, filename)
        print(pathfile)
        df = pd.read_csv(pathfile,sep=';')
        #create a File columns to identify the input file
        df['File'] = filename
        base_df = pd.concat([base_df,df])


In [None]:
#Remove .csv, toc moment
base_df['File'] = base_df['File'].map(lambda x : x.replace('.csv',''))
base_df.head()

**We can see several NaN values in "Value" columns, so i create a function to visualize better the NaN values in "Value" column**

In [None]:
def missed_values_by_col(df, col):
    plt.figure(figsize=(15,5))
    if len(df[col].unique()) > 10:
        plt.xticks(rotation=90)
        
    null_values = df['Value'].isnull()
    total_values = df['Value'].isnull() + df['Value'].notnull()

    ax = sns.barplot(x=df[col], y=total_values,  
                     linewidth=2.5, facecolor=(1, 1, 1, 0),
                     errcolor=".2", edgecolor=".2")

    ax = sns.barplot(x=df[col], y=null_values,  
                     linewidth=1.5, 
                     errcolor=".3", edgecolor=".3", color='r', alpha=.7)
    plt.title('Missed Values of column "Value" by {}'.format(col))

In [None]:
#Make electrodes
missed_values_by_col(base_df,'File')
missed_values_by_col(base_df,'Year')
missed_values_by_col(base_df,'CountryCode')

**We have much null values, so now we drop and work only with the not nulls values of "Value"**

In [None]:
base_df.dropna(inplace=True)

In [None]:
gby = base_df.groupby(['File','CountryCode','Year'])['Value'].sum().reset_index()

In [None]:
gby['File'] = gby['File'].map(lambda x:x.replace('_Data',''))
gby.head()

In [None]:
def plot_line(df, ax, country='ALL'):
    if country !='ALL':
        df = df[df['CountryCode']==country]
    sns.set_style("whitegrid")
    sns.lineplot(data=df, x="Year", y="Value", hue="File",ax=ax)
    ax.set_title('Values of {}'.format(country))

In [None]:
countrys = ['ALL','BRA', 'CHN', 'IND', 'RUS', 'ZAF']
fig, axes = plt.subplots(6, 1, figsize=(10, 30))

for i,c in enumerate(countrys):

    plot_line(gby,axes[i],c)

