In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
df = pd.read_csv('cleaned_data.csv')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6361410 entries, 0 to 6361409
Data columns (total 14 columns):
 #   Column                                              Dtype  
---  ------                                              -----  
 0   Year                                                int64  
 1   Economy                                             int64  
 2   Economy Label                                       object 
 3   Sex                                                 int64  
 4   Sex Label                                           object 
 5   AgeClass                                            int64  
 6   AgeClass Label                                      object 
 7   Absolute value in thousands_x                       float64
 8   Series                                              float64
 9   Series Label                                        object 
 10  Persons per hundred persons aged 15-64              float64
 11  Annual average growth rate           

<b>Heatmap</b> <br>
<i>Illustrates relation between numeric variables. Highlights relationships and their positive, negative dependency.
Value ranges from 0-1. Value closer to 1 illustrates that the respective variables are highly dependent with each other and the value close to 0 represents that the variable are lowly/not depended to each other.</i>

In [None]:
numeric_columns = df.select_dtypes(include=['float64', 'int64']).columns
numeric_columns

In [None]:
correlation_matrix = df[numeric_columns].corr()

In [None]:
# Heatmap using Seaborn
plt.figure(figsize=(10, 10))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', linewidths=0.5)
plt.title('Correlation Matrix Heatmap')
plt.show()

In [3]:
import plotly.express as px

<b>Interactive LinePlot with plotly</b><br>
<i>Good for visualizing trends over time.</i>

In [None]:
fig = px.line(df, x='Year', y='Annual average growth rate', color='Economy Label',
              line_group='Economy Label', hover_name='Economy Label')
fig.update_layout(title='Annual Average Growth Rate Over Time',
                  xaxis_title='Year',
                  yaxis_title='Annual Average Growth Rate')
fig.show()


<b>Interactive Scatter Plot with plotly</b><br>
<i>This plot can reveal correlations or patterns and allows for easy exploration of specific points.</i>

In [None]:
fig = px.scatter(df, x='Urban population as percentage of total population', y='Annual average growth rate',
                 color='Economy Label', hover_data=['Year', 'Economy Label'])
fig.update_layout(title='Urban Population vs. Annual Growth Rate',
                  xaxis_title='Urban Population as Percentage of Total Population',
                  yaxis_title='Annual Average Growth Rate')
fig.show()

<b>Interactive Bar Plot with Plotly<b><br>
<i>Effective for visualizing the frequency of categorical data, and interactivity allows users to explore specific economies.</i>

In [None]:
fig = px.bar(df, x='Economy Label', y=df.index, orientation='v', title='Count of Entries for Each Economy')
fig.update_layout(xaxis_title='Economy', yaxis_title='Count', xaxis={'categoryorder': 'total descending'})
fig.show()
