In [36]:
# Import Required Libraries
# Import all necessary libraries required for data analysis and machine learning modeling.

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
import seaborn as sns







In [37]:
# Set Display Options for Pandas
# Configure pandas to display all rows and columns for comprehensive data visualization.

pd.set_option('display.max_columns', None)  # Show all columns
pd.set_option('display.max_rows', None)     # Show all rows


In [38]:
# Load and Inspect Data
# Load the data from the CSV files and inspect the first few rows to understand the data structure.
file_paths = [
    '../data/pivot01_age_structure_broad.csv',
    '../data/pivot03_migration_by_age.csv',
    '../data/pivot18_clean_citizenship_migrations.csv',
]

df1 = pd.read_csv(file_paths[0])
df3 = pd.read_csv(file_paths[1])
df18 = pd.read_csv(file_paths[2])

# Display the first few rows of df1 for inspection
df1_head = df1.head()
df1_head

# Display column names of df1
df1.columns




Index(['Unnamed: 0', 'year', 'sex', '0 - 14 years', '1 - 4 years',
       '15 - 24 years', '15 years and over', '25 - 44 years', '45 - 64 years',
       '65 years and over', 'All ages', 'Under 1 year'],
      dtype='object')

In [39]:
df1.head()

Unnamed: 0.1,Unnamed: 0,year,sex,0 - 14 years,1 - 4 years,15 - 24 years,15 years and over,25 - 44 years,45 - 64 years,65 years and over,All ages,Under 1 year
0,0,1950,Both sexes,851.2,249.1,452.6,2117.8,773.6,574.5,317.1,2969.0,61.1
1,1,1950,Female,416.6,121.6,217.7,1042.4,380.2,283.2,161.3,1459.0,29.7
2,2,1950,Male,434.6,127.5,234.9,1075.4,393.4,291.3,155.8,1510.0,31.4
3,3,1951,Both sexes,854.8,249.3,443.4,2105.8,771.2,574.8,316.4,2960.6,63.6
4,4,1951,Female,418.4,121.7,212.2,1035.6,378.9,283.1,161.4,1454.0,30.9


In [40]:


# Check and drop the 'Under 1 year' and '1 - 4 years' columns if they exist
if 'Under 1 year' in df1.columns:
    df1 = df1.drop(['Under 1 year'], axis=1)

if '1 - 4 years' in df1.columns:
    df1 = df1.drop(['1 - 4 years'], axis=1)

# Check if the 'sex' column exists and needs encoding
if 'sex' in df1.columns and 'sex_Female' not in df1.columns and 'sex_Male' not in df1.columns:
    df1 = pd.get_dummies(df1, columns=['sex'])

# Display the columns after modifications
df1.columns


Index(['Unnamed: 0', 'year', '0 - 14 years', '15 - 24 years',
       '15 years and over', '25 - 44 years', '45 - 64 years',
       '65 years and over', 'All ages', 'sex_Both sexes', 'sex_Female',
       'sex_Male'],
      dtype='object')

In [41]:
df1.head()

Unnamed: 0.1,Unnamed: 0,year,0 - 14 years,15 - 24 years,15 years and over,25 - 44 years,45 - 64 years,65 years and over,All ages,sex_Both sexes,sex_Female,sex_Male
0,0,1950,851.2,452.6,2117.8,773.6,574.5,317.1,2969.0,1,0,0
1,1,1950,416.6,217.7,1042.4,380.2,283.2,161.3,1459.0,0,1,0
2,2,1950,434.6,234.9,1075.4,393.4,291.3,155.8,1510.0,0,0,1
3,3,1951,854.8,443.4,2105.8,771.2,574.8,316.4,2960.6,1,0,0
4,4,1951,418.4,212.2,1035.6,378.9,283.1,161.4,1454.0,0,1,0


In [42]:
df1.head()

Unnamed: 0.1,Unnamed: 0,year,0 - 14 years,15 - 24 years,15 years and over,25 - 44 years,45 - 64 years,65 years and over,All ages,sex_Both sexes,sex_Female,sex_Male
0,0,1950,851.2,452.6,2117.8,773.6,574.5,317.1,2969.0,1,0,0
1,1,1950,416.6,217.7,1042.4,380.2,283.2,161.3,1459.0,0,1,0
2,2,1950,434.6,234.9,1075.4,393.4,291.3,155.8,1510.0,0,0,1
3,3,1951,854.8,443.4,2105.8,771.2,574.8,316.4,2960.6,1,0,0
4,4,1951,418.4,212.2,1035.6,378.9,283.1,161.4,1454.0,0,1,0


In [43]:
df3.head()

Unnamed: 0,year,age_group,sex,emigrants,immigrants,migration
0,1987,0 - 14 years,Both sexes,2.8,3.1,0.3
1,1987,0 - 14 years,Female,1.3,1.8,0.5
2,1987,0 - 14 years,Male,1.4,1.3,-0.1
3,1987,15 - 24 years,Both sexes,24.0,5.1,-18.9
4,1987,15 - 24 years,Female,11.7,3.2,-8.5


In [44]:
df3.age_group.unique()

array(['0 - 14 years', '15 - 24 years', '25 - 44 years', '45 - 64 years',
       '65 years and over', 'All ages'], dtype=object)

In [45]:
# Pivoting separately for 'emigrants', 'immigrants', 'migration'
pivot_emigrants = df3.pivot_table(index='year', columns='age_group', values='emigrants', aggfunc='sum')
pivot_immigrants = df3.pivot_table(index='year', columns='age_group', values='immigrants', aggfunc='sum')
pivot_migration = df3.pivot_table(index='year', columns='age_group', values='migration', aggfunc='sum')

# Concatenate the pivot tables
df3 = pd.concat([pivot_emigrants.add_suffix('_emigrants'), 
                                pivot_immigrants.add_suffix('_immigrants'), 
                                pivot_migration.add_suffix('_migration')], axis=1).reset_index()


df3.to_csv('../data/pivot18_concatenated.csv', index=False)
df3.head()

age_group,year,0 - 14 years_emigrants,15 - 24 years_emigrants,25 - 44 years_emigrants,45 - 64 years_emigrants,65 years and over_emigrants,All ages_emigrants,0 - 14 years_immigrants,15 - 24 years_immigrants,25 - 44 years_immigrants,45 - 64 years_immigrants,65 years and over_immigrants,All ages_immigrants,0 - 14 years_migration,15 - 24 years_migration,25 - 44 years_migration,45 - 64 years_migration,65 years and over_migration,All ages_migration
0,1987,5.5,48.0,23.6,3.4,0.0,40.2,6.2,10.2,12.2,3.6,2.2,17.2,0.7,-37.8,-11.4,0.2,2.2,-23.0
1,1988,16.6,62.5,36.6,6.4,0.0,61.1,6.0,10.8,14.4,4.3,2.8,19.2,-10.6,-51.7,-22.2,-2.1,2.8,-41.9
2,1989,15.6,74.0,43.8,7.5,0.0,70.6,8.8,15.4,21.1,4.4,3.7,26.7,-6.8,-58.6,-22.7,-3.1,3.7,-43.9
3,1990,13.4,61.6,33.8,3.8,0.0,56.3,10.4,20.2,27.9,5.4,2.7,33.3,-3.0,-41.4,-5.9,1.6,2.7,-22.9
4,1991,9.2,39.8,21.0,0.8,0.0,35.3,10.5,18.6,29.1,5.0,3.4,33.3,1.3,-21.2,8.1,4.2,3.4,-2.0


In [46]:
df18.head()

Unnamed: 0.1,Unnamed: 0,index,year,sex,endpoint,All countries,Australia,Canada,EU14_excl_Irl_UK,EU15_to_27,other_countries_23,UK,USA
0,0,0,1987,Both sexes,Emigrants: All destinations,40.2,10.4,1.1,3.1,0.0,5.4,21.8,9.9
1,1,1,1987,Both sexes,Immigrants: All origins,17.2,10.4,1.1,2.2,0.0,4.0,8.1,3.0
2,2,2,1987,Both sexes,Net migration,-23.0,10.4,1.1,-0.9,0.0,-1.4,-13.7,-6.9
3,3,3,1987,Female,Emigrants: All destinations,-20.366667,10.4,1.1,1.9,0.0,2.8,8.7,5.2
4,4,4,1987,Female,Immigrants: All origins,-17.733333,10.4,1.1,1.2,0.0,2.2,4.0,1.7


In [47]:
df18.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 333 entries, 0 to 332
Data columns (total 13 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Unnamed: 0          333 non-null    int64  
 1   index               333 non-null    int64  
 2   year                333 non-null    int64  
 3   sex                 333 non-null    object 
 4   endpoint            333 non-null    object 
 5   All countries       333 non-null    float64
 6   Australia           333 non-null    float64
 7   Canada              333 non-null    float64
 8   EU14_excl_Irl_UK    333 non-null    float64
 9   EU15_to_27          333 non-null    float64
 10  other_countries_23  333 non-null    float64
 11  UK                  333 non-null    float64
 12  USA                 333 non-null    float64
dtypes: float64(8), int64(3), object(2)
memory usage: 33.9+ KB


In [48]:
# Apply one-hot encoding to both 'sex' and 'endpoint' columns
df18 = pd.get_dummies(df18, columns=['sex', 'endpoint'])

# Display the first few rows to inspect the changes
df18.head()



Unnamed: 0.1,Unnamed: 0,index,year,All countries,Australia,Canada,EU14_excl_Irl_UK,EU15_to_27,other_countries_23,UK,USA,sex_Both sexes,sex_Female,sex_Male,endpoint_Emigrants: All destinations,endpoint_Immigrants: All origins,endpoint_Net migration
0,0,0,1987,40.2,10.4,1.1,3.1,0.0,5.4,21.8,9.9,1,0,0,1,0,0
1,1,1,1987,17.2,10.4,1.1,2.2,0.0,4.0,8.1,3.0,1,0,0,0,1,0
2,2,2,1987,-23.0,10.4,1.1,-0.9,0.0,-1.4,-13.7,-6.9,1,0,0,0,0,1
3,3,3,1987,-20.366667,10.4,1.1,1.9,0.0,2.8,8.7,5.2,0,1,0,1,0,0
4,4,4,1987,-17.733333,10.4,1.1,1.2,0.0,2.2,4.0,1.7,0,1,0,0,1,0


In [49]:
# Save the DataFrame to CSV
df1.to_csv('../data/ml_01_age-structure.csv', index=False)
df3.to_csv('../data/ml_03_migration_by_age.csv', index=False)
df18.to_csv('../data/ml_18_citizen_migration.csv', index=False)

In [50]:
# Merging df1 and df3 on 'year'
merged_df = pd.merge(df1, df3, on='year', how='inner', suffixes=('_df1', '_df3'))

# Merging the above result with df18
df = pd.merge(merged_df, df18, on='year', how='inner')
df.to_csv('../data/ml_merge.csv', index=False)

In [51]:
df.head()

Unnamed: 0,Unnamed: 0_x,year,0 - 14 years,15 - 24 years,15 years and over,25 - 44 years,45 - 64 years,65 years and over,All ages,sex_Both sexes_x,sex_Female_x,sex_Male_x,0 - 14 years_emigrants,15 - 24 years_emigrants,25 - 44 years_emigrants,45 - 64 years_emigrants,65 years and over_emigrants,All ages_emigrants,0 - 14 years_immigrants,15 - 24 years_immigrants,25 - 44 years_immigrants,45 - 64 years_immigrants,65 years and over_immigrants,All ages_immigrants,0 - 14 years_migration,15 - 24 years_migration,25 - 44 years_migration,45 - 64 years_migration,65 years and over_migration,All ages_migration,Unnamed: 0_y,index,All countries,Australia,Canada,EU14_excl_Irl_UK,EU15_to_27,other_countries_23,UK,USA,sex_Both sexes_y,sex_Female_y,sex_Male_y,endpoint_Emigrants: All destinations,endpoint_Immigrants: All origins,endpoint_Net migration
0,111,1987,1014.4,615.8,2532.1,936.0,592.1,388.2,3546.5,1,0,0,5.5,48.0,23.6,3.4,0.0,40.2,6.2,10.2,12.2,3.6,2.2,17.2,0.7,-37.8,-11.4,0.2,2.2,-23.0,0,0,40.2,10.4,1.1,3.1,0.0,5.4,21.8,9.9,1,0,0,1,0,0
1,111,1987,1014.4,615.8,2532.1,936.0,592.1,388.2,3546.5,1,0,0,5.5,48.0,23.6,3.4,0.0,40.2,6.2,10.2,12.2,3.6,2.2,17.2,0.7,-37.8,-11.4,0.2,2.2,-23.0,1,1,17.2,10.4,1.1,2.2,0.0,4.0,8.1,3.0,1,0,0,0,1,0
2,111,1987,1014.4,615.8,2532.1,936.0,592.1,388.2,3546.5,1,0,0,5.5,48.0,23.6,3.4,0.0,40.2,6.2,10.2,12.2,3.6,2.2,17.2,0.7,-37.8,-11.4,0.2,2.2,-23.0,2,2,-23.0,10.4,1.1,-0.9,0.0,-1.4,-13.7,-6.9,1,0,0,0,0,1
3,111,1987,1014.4,615.8,2532.1,936.0,592.1,388.2,3546.5,1,0,0,5.5,48.0,23.6,3.4,0.0,40.2,6.2,10.2,12.2,3.6,2.2,17.2,0.7,-37.8,-11.4,0.2,2.2,-23.0,3,3,-20.366667,10.4,1.1,1.9,0.0,2.8,8.7,5.2,0,1,0,1,0,0
4,111,1987,1014.4,615.8,2532.1,936.0,592.1,388.2,3546.5,1,0,0,5.5,48.0,23.6,3.4,0.0,40.2,6.2,10.2,12.2,3.6,2.2,17.2,0.7,-37.8,-11.4,0.2,2.2,-23.0,4,4,-17.733333,10.4,1.1,1.2,0.0,2.2,4.0,1.7,0,1,0,0,1,0


In [52]:
# Choose a target variable
target = 'All ages_migration'  # Replace with your chosen target variable

# Prepare the feature matrix (X) and target vector (y)
X = df.drop(target, axis=1)
y = df[target]

# Split the dataset into training and testing sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train your regression model (e.g., Linear Regression)
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(X_train, y_train)

# Predict and evaluate the model
y_pred = model.predict(X_test)
from sklearn.metrics import mean_squared_error
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")


Mean Squared Error: 0.0011204134485451858


In [53]:
# Assuming you have a trained model 'model' and a test set (X_test, y_test)

# Predict future values (for the test set in this case)
future_predictions = model.predict(X_test)

# Evaluate the predictions
mse = mean_squared_error(y_test, future_predictions)
print(f"Future Prediction Mean Squared Error: {mse}")


Future Prediction Mean Squared Error: 0.0011204134485451858


In [54]:
df.columns

Index(['Unnamed: 0_x', 'year', '0 - 14 years', '15 - 24 years',
       '15 years and over', '25 - 44 years', '45 - 64 years',
       '65 years and over', 'All ages', 'sex_Both sexes_x', 'sex_Female_x',
       'sex_Male_x', '0 - 14 years_emigrants', '15 - 24 years_emigrants',
       '25 - 44 years_emigrants', '45 - 64 years_emigrants',
       '65 years and over_emigrants', 'All ages_emigrants',
       '0 - 14 years_immigrants', '15 - 24 years_immigrants',
       '25 - 44 years_immigrants', '45 - 64 years_immigrants',
       '65 years and over_immigrants', 'All ages_immigrants',
       '0 - 14 years_migration', '15 - 24 years_migration',
       '25 - 44 years_migration', '45 - 64 years_migration',
       '65 years and over_migration', 'All ages_migration', 'Unnamed: 0_y',
       'index', 'All countries', 'Australia', 'Canada', 'EU14_excl_Irl_UK',
       'EU15_to_27', 'other_countries_23', 'UK', 'USA', 'sex_Both sexes_y',
       'sex_Female_y', 'sex_Male_y', 'endpoint_Emigrants: All desti

In [55]:
# Choose a target variable
target = 'other_countries_23'  # Replace with your chosen target variable

# Prepare the feature matrix (X) and target vector (y)
X = df.drop(target, axis=1)
y = df[target]

# Split the dataset into training and testing sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train your regression model (e.g., Linear Regression)
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(X_train, y_train)

# Predict and evaluate the model
y_pred = model.predict(X_test)
from sklearn.metrics import mean_squared_error
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")


Mean Squared Error: 26.540020690917967


In [56]:
# Select your features and target variable
X = df.drop('other_countries_23', axis=1)  # Replace 'target_variable' with your actual target variable
y = df['other_countries_23']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train your model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions (assuming 'other_countries_23' is part of your features)
# Here, I'm using X_test for demonstration; replace it with your new data if available
predictions = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, predictions)
print(f"Mean Squared Error: {mse}")

Mean Squared Error: 26.540020690917967


In [33]:
df18

Unnamed: 0.1,Unnamed: 0,index,year,All countries,Australia,Canada,EU14_excl_Irl_UK,EU15_to_27,other_countries_23,UK,USA,sex_Both sexes,sex_Female,sex_Male,endpoint_Emigrants: All destinations,endpoint_Immigrants: All origins,endpoint_Net migration
0,0,0,1987,40.2,10.4,1.1,3.1,0.0,5.4,21.8,9.9,1,0,0,1,0,0
1,1,1,1987,17.2,10.4,1.1,2.2,0.0,4.0,8.1,3.0,1,0,0,0,1,0
2,2,2,1987,-23.0,10.4,1.1,-0.9,0.0,-1.4,-13.7,-6.9,1,0,0,0,0,1
3,3,3,1987,-20.366667,10.4,1.1,1.9,0.0,2.8,8.7,5.2,0,1,0,1,0,0
4,4,4,1987,-17.733333,10.4,1.1,1.2,0.0,2.2,4.0,1.7,0,1,0,0,1,0
5,5,5,1987,-15.1,10.4,1.1,-7.5,0.0,0.6,-4.7,-3.5,0,1,0,0,0,1
6,6,6,1987,-14.6,10.4,1.1,1.2,0.0,2.6,13.1,4.8,0,0,1,1,0,0
7,7,7,1987,-14.1,10.4,1.1,1.0,0.0,1.8,4.1,1.2,0,0,1,0,1,0
8,8,8,1987,-13.6,10.4,1.1,-0.2,0.0,-0.8,-9.0,-3.6,0,0,1,0,0,1
9,9,9,1988,61.1,10.4,1.1,2.8,0.0,10.2,40.2,7.9,1,0,0,1,0,0


In [35]:

# Prepare the feature matrix (X) and target vector (y)
X_df18 = df18.drop('other_countries_23', axis=1)  # Drop other unwanted columns as necessary
y_df18 = df18['other_countries_23']

# Split the dataset
X_train_df18, X_test_df18, y_train_df18, y_test_df18 = train_test_split(X_df18, y_df18, test_size=0.2, random_state=42)

# Train the model
model_df18 = LinearRegression()
model_df18.fit(X_train_df18, y_train_df18)

# Make predictions
predictions_df18 = model_df18.predict(X_test_df18)

# Evaluate the model
mse_df18 = mean_squared_error(y_test_df18, predictions_df18)
print(f"Mean Squared Error: {mse_df18}")


Mean Squared Error: 39.90053952083427


In [58]:


# Filter the DataFrame to include only rows where 'endpoint_Net migration' is 1
df18_filtered = df18[df18['endpoint_Net migration'] == 1]

# Drop the 'endpoint_Net migration' column as it's no longer needed for modeling
df18_filtered = df18_filtered.drop('endpoint_Net migration', axis=1)

# Select features and target variable
X_df18_filtered = df18_filtered.drop('other_countries_23', axis=1)
y_df18_filtered = df18_filtered['other_countries_23']

# Split the data into training and testing sets
X_train_df18, X_test_df18, y_train_df18, y_test_df18 = train_test_split(X_df18_filtered, y_df18_filtered, test_size=0.2, random_state=42)

# Train the model
model_df18 = LinearRegression()
model_df18.fit(X_train_df18, y_train_df18)

# Make predictions and evaluate the model
predictions_df18 = model_df18.predict(X_test_df18)
mse_df18 = mean_squared_error(y_test_df18, predictions_df18)
print(f"Mean Squared Error: {mse_df18}")


Mean Squared Error: 2.832563960863196


In [61]:
df.columns


Index(['Unnamed: 0_x', 'year', '0 - 14 years', '15 - 24 years',
       '15 years and over', '25 - 44 years', '45 - 64 years',
       '65 years and over', 'All ages', 'sex_Both sexes_x', 'sex_Female_x',
       'sex_Male_x', '0 - 14 years_emigrants', '15 - 24 years_emigrants',
       '25 - 44 years_emigrants', '45 - 64 years_emigrants',
       '65 years and over_emigrants', 'All ages_emigrants',
       '0 - 14 years_immigrants', '15 - 24 years_immigrants',
       '25 - 44 years_immigrants', '45 - 64 years_immigrants',
       '65 years and over_immigrants', 'All ages_immigrants',
       '0 - 14 years_migration', '15 - 24 years_migration',
       '25 - 44 years_migration', '45 - 64 years_migration',
       '65 years and over_migration', 'All ages_migration', 'Unnamed: 0_y',
       'index', 'All countries', 'Australia', 'Canada', 'EU14_excl_Irl_UK',
       'EU15_to_27', 'other_countries_23', 'UK', 'USA', 'sex_Both sexes_y',
       'sex_Female_y', 'sex_Male_y', 'endpoint_Emigrants: All desti

In [62]:
# Basic Descriptive Statistics
df.describe()

Unnamed: 0,Unnamed: 0_x,year,0 - 14 years,15 - 24 years,15 years and over,25 - 44 years,45 - 64 years,65 years and over,All ages,sex_Both sexes_x,sex_Female_x,sex_Male_x,0 - 14 years_emigrants,15 - 24 years_emigrants,25 - 44 years_emigrants,45 - 64 years_emigrants,65 years and over_emigrants,All ages_emigrants,0 - 14 years_immigrants,15 - 24 years_immigrants,25 - 44 years_immigrants,45 - 64 years_immigrants,65 years and over_immigrants,All ages_immigrants,0 - 14 years_migration,15 - 24 years_migration,25 - 44 years_migration,45 - 64 years_migration,65 years and over_migration,All ages_migration,Unnamed: 0_y,index,All countries,Australia,Canada,EU14_excl_Irl_UK,EU15_to_27,other_countries_23,UK,USA,sex_Both sexes_y,sex_Female_y,sex_Male_y,endpoint_Emigrants: All destinations,endpoint_Immigrants: All origins,endpoint_Net migration
count,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0,999.0
mean,166.0,2005.0,621.544144,413.5,2182.875676,820.585586,607.674775,341.116216,2804.405405,0.333333,0.333333,0.333333,8.02973,43.0,38.351351,5.42973,2.12973,81.52973,15.816216,37.735135,61.832432,11.178378,4.224324,115.759459,7.805405,-5.256757,23.481081,5.756757,2.086486,34.227027,166.0,166.0,26.453453,7.114715,1.15015,4.411712,4.462462,8.508709,7.472372,2.18018,0.333333,0.333333,0.333333,0.333333,0.333333,0.333333
std,32.057688,10.682426,225.595711,147.751167,858.17121,322.609134,267.774851,150.256485,1070.20829,0.471641,0.471641,0.471641,6.812104,11.100089,23.087175,4.445615,1.99204,46.025062,7.636878,22.284813,33.493691,6.921953,2.399557,77.767516,9.734181,28.389616,32.909651,6.312807,2.14573,63.739135,96.176535,96.176535,29.893939,4.653623,1.219842,4.82508,9.459204,9.682062,8.292445,2.450416,0.471641,0.471641,0.471641,0.471641,0.471641,0.471641
min,111.0,1987.0,402.6,277.3,1244.6,465.2,295.8,169.7,1742.8,0.0,0.0,0.0,0.0,23.6,6.2,0.0,0.0,25.3,3.7,10.2,12.2,3.6,1.4,17.2,-10.6,-58.6,-35.4,-6.4,-4.2,-54.9,0.0,0.0,-43.9,-12.0,-3.9,-17.7,-7.7,-6.8,-34.2,-6.9,0.0,0.0,0.0,0.0,0.0,0.0
25%,138.0,1996.0,451.3,307.8,1488.2,556.2,396.9,229.2,1906.6,0.0,0.0,0.0,2.7,35.6,17.6,1.6,0.0,35.1,11.1,24.8,33.7,6.6,2.4,40.7,0.7,-21.1,-5.9,0.6,0.7,-4.7,83.0,83.0,12.5,3.4,1.1,2.1,0.0,3.1,4.1,0.9,0.0,0.0,0.0,0.0,0.0,0.0
50%,166.0,2005.0,495.1,323.1,1836.0,716.1,544.3,298.4,2328.5,0.0,0.0,0.0,4.9,42.1,36.6,4.9,2.0,70.6,14.0,34.6,55.4,8.6,3.4,117.0,8.9,-9.1,27.0,5.8,2.1,17.4,166.0,166.0,26.0,10.4,1.1,4.0,0.0,6.1,7.5,2.3,0.0,0.0,0.0,0.0,0.0,0.0
75%,194.0,2014.0,859.4,601.6,2766.7,1016.1,703.8,416.3,3626.1,1.0,1.0,1.0,12.0,47.8,62.5,7.5,3.4,112.2,19.8,43.6,89.4,14.2,6.0,169.2,12.8,9.2,39.9,8.7,3.0,82.6,249.0,249.0,41.3,10.4,1.1,6.7,5.3,10.9,10.6,3.4,1.0,1.0,1.0,1.0,1.0,1.0
max,221.0,2023.0,1015.5,669.2,4270.0,1457.7,1338.7,806.3,5281.6,1.0,1.0,1.0,22.9,74.0,77.0,15.2,6.8,166.0,43.7,122.8,150.1,33.8,9.3,302.2,32.7,86.2,89.2,23.4,6.8,209.5,332.0,332.0,151.1,17.4,7.2,20.3,72.6,75.8,48.4,9.9,1.0,1.0,1.0,1.0,1.0,1.0


In [63]:
# Correlation Matrix
df.corr()

Unnamed: 0,Unnamed: 0_x,year,0 - 14 years,15 - 24 years,15 years and over,25 - 44 years,45 - 64 years,65 years and over,All ages,sex_Both sexes_x,sex_Female_x,sex_Male_x,0 - 14 years_emigrants,15 - 24 years_emigrants,25 - 44 years_emigrants,45 - 64 years_emigrants,65 years and over_emigrants,All ages_emigrants,0 - 14 years_immigrants,15 - 24 years_immigrants,25 - 44 years_immigrants,45 - 64 years_immigrants,65 years and over_immigrants,All ages_immigrants,0 - 14 years_migration,15 - 24 years_migration,25 - 44 years_migration,45 - 64 years_migration,65 years and over_migration,All ages_migration,Unnamed: 0_y,index,All countries,Australia,Canada,EU14_excl_Irl_UK,EU15_to_27,other_countries_23,UK,USA,sex_Both sexes_y,sex_Female_y,sex_Male_y,endpoint_Emigrants: All destinations,endpoint_Immigrants: All origins,endpoint_Net migration
Unnamed: 0_x,1.0,0.9996753,0.07801927,-0.02388507,0.3869978,0.3641683,0.5421237,0.4857354,0.3267746,-0.02206837,2.808838e-16,0.02206837,0.3861876,-0.4440121,0.666014,0.6694991,0.7126147,0.7594495,0.6648583,0.4481426,0.80129,0.7509739,0.77982,0.8068289,0.2523649,0.5261295,0.3485648,0.353687,0.2049496,0.4358703,0.9993146,0.9993146,0.4957164,-0.6942317,0.1261201,0.3713359,0.2600817,0.4979517,0.0178945,0.06969478,-2.578104e-16,-1.617495e-16,-2.632256e-16,-4.744182e-16,7.934438000000001e-17,6.5924110000000005e-18
year,0.9996753,1.0,0.09902063,-0.002394577,0.4073024,0.3843333,0.5600086,0.505401,0.3474832,1.543479e-15,8.302071e-18,-1.528641e-15,0.3863131,-0.4441564,0.6662303,0.6697166,0.7128462,0.7596962,0.6650743,0.4482882,0.8015503,0.7512179,0.7800733,0.807091,0.2524468,0.5263004,0.348678,0.3538019,0.2050162,0.4360119,0.9996392,0.9996392,0.4958775,-0.6944572,0.1261611,0.3714565,0.2601662,0.4981135,0.01790031,0.06971742,-3.536329e-16,1.012146e-16,-4.225224e-16,-2.60897e-16,-2.1550060000000002e-17,-9.856501e-17
0 - 14 years,0.07801927,0.09902063,1.0,0.9508855,0.9245597,0.9155043,0.8515295,0.8622607,0.9521783,0.9745713,-0.5238536,-0.4507177,0.1558935,0.03179118,0.1577554,0.1572395,0.06701614,0.1418872,0.04586548,-0.01374962,0.04664755,0.07498254,0.1454081,0.05892789,-0.07315496,-0.02302457,-0.063106,-0.02836958,0.09935684,-0.03055402,0.09898491,0.09898491,0.0069724,-0.1232492,0.01897184,0.0007584396,0.00142215,0.03884754,-0.01417389,-0.01019217,-1.599247e-17,-4.944952e-17,-5.1523850000000006e-17,3.546447e-18,3.352396e-17,-1.8200630000000002e-17
15 - 24 years,-0.02388507,-0.002394577,0.9508855,1.0,0.8918186,0.8881272,0.7941653,0.7879646,0.9155665,0.9899652,-0.5191565,-0.4708087,-0.09777774,-0.04018795,-0.06206695,-0.04978075,-0.007285176,-0.05625521,0.04673882,0.05303775,0.03747316,0.04497825,-0.01888704,0.03475699,0.1051102,0.0573287,0.08170867,0.08486263,-0.01422449,0.0830397,-0.002393713,-0.002393713,0.0296723,0.03232916,0.01316438,0.01588596,0.02608667,0.03148,0.01000987,0.008071483,2.145541e-18,2.155758e-17,2.5542150000000002e-17,-3.167227e-18,-4.03566e-18,1.542746e-17
15 years and over,0.3869978,0.4073024,0.9245597,0.8918186,1.0,0.9905223,0.981111,0.9591987,0.9967689,0.8997573,-0.428389,-0.4713683,0.1537529,-0.1666676,0.2848431,0.2856643,0.3023839,0.3210557,0.2793533,0.2050791,0.3351419,0.3152343,0.3240134,0.3454921,0.1119719,0.2264284,0.1413793,0.1453357,0.07927313,0.1896533,0.4071554,0.4071554,0.2048529,-0.288212,0.05135507,0.1513683,0.124148,0.2106609,0.002640597,0.0225486,-1.068086e-16,-5.48643e-17,-7.519875000000001e-17,-1.808992e-16,4.292925e-17,1.5813730000000002e-17
25 - 44 years,0.3641683,0.3843333,0.9155043,0.8881272,0.9905223,1.0,0.9638824,0.9190465,0.987259,0.8997515,-0.4373642,-0.4623873,0.1492061,-0.1416001,0.3076913,0.2763778,0.3332372,0.34867,0.2286947,0.2558498,0.3116915,0.24824,0.2861018,0.3433968,0.07549698,0.2563376,0.1014084,0.07814296,0.008913248,0.1671441,0.3841946,0.3841946,0.2027383,-0.2815109,0.01520865,0.1607691,0.163401,0.1728544,-0.00722639,0.0107616,-9.208669000000001e-17,-1.483307e-17,-4.332941e-17,-1.605902e-16,4.9459160000000004e-17,2.5642030000000003e-17
45 - 64 years,0.5421237,0.5600086,0.8515295,0.7941653,0.981111,0.9638824,1.0,0.9708811,0.9662298,0.802706,-0.4021242,-0.4005818,0.2252559,-0.2328641,0.3835735,0.3925182,0.399248,0.4324024,0.3809777,0.2462827,0.4519016,0.4345484,0.4482414,0.4570962,0.141785,0.2848011,0.1910032,0.2011441,0.127256,0.2453981,0.5598066,0.5598066,0.2727027,-0.3953553,0.07719502,0.2008365,0.145675,0.2886024,0.005936487,0.03487297,-1.478407e-16,-8.825346000000001e-17,-1.295474e-16,-2.560788e-16,5.411902e-17,1.448811e-17
65 years and over,0.4857354,0.505401,0.8622607,0.7879646,0.9591987,0.9190465,0.9708811,1.0,0.9509217,0.8030037,-0.280478,-0.5225257,0.2524958,-0.1933336,0.3436506,0.3875755,0.3071435,0.369761,0.3794913,0.130784,0.4026332,0.4487624,0.4560209,0.3870779,0.1213284,0.1788178,0.168941,0.2203466,0.2208614,0.2052372,0.5052186,0.5052186,0.2194888,-0.3688758,0.1101483,0.145776,0.07287761,0.286721,0.01017143,0.03559247,-1.212363e-16,-1.329656e-16,-1.819925e-16,-2.19893e-16,4.3375810000000004e-17,-1.486883e-17
All ages,0.3267746,0.3474832,0.9521783,0.9155665,0.9967689,0.987259,0.9662298,0.9509217,1.0,0.9269258,-0.4539447,-0.4729811,0.1561629,-0.126933,0.2616752,0.262221,0.2565974,0.2873647,0.2336733,0.1615406,0.2785732,0.2685886,0.290479,0.2894608,0.07435825,0.176702,0.1000555,0.1105593,0.08452472,0.1456282,0.3473578,0.3473578,0.1657312,-0.2570992,0.04518089,0.1215355,0.0998438,0.1771151,-0.0008724459,0.01593177,-7.261372000000001e-17,-6.474300000000001e-17,-7.955349e-17,-1.428013e-16,4.4121160000000003e-17,6.714089e-18
sex_Both sexes_x,-0.02206837,1.543479e-15,0.9745713,0.9899652,0.8997573,0.8997515,0.802706,0.8030037,0.9269258,1.0,-0.5,-0.5,-1.821611e-16,9.455881000000001e-17,-2.932514e-16,-2.619918e-16,-1.980843e-16,-2.796893e-16,-2.238571e-16,-1.405586e-16,-2.458557e-16,-2.655148e-16,-3.053084e-16,-2.808787e-16,-5.737871e-17,-1.197051e-16,-7.751966000000001e-17,-1.165737e-16,-1.435611e-16,-1.267057e-16,-2.503456e-16,-2.503456e-16,-1.119139e-16,2.386238e-16,-7.192965e-17,-7.085691000000001e-17,-9.475399000000001e-17,-1.74427e-16,1.399429e-17,3.850248e-18,4.87598e-18,1.875377e-18,7.876582e-18,1.775357e-17,8.06412e-18,7.626532e-18


In [None]:
# Visualization: Correlation Heatmap
plt.figure(figsize=(12, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm')
plt.title('Correlation Matrix Heatmap')
plt.show()

# Visualization: Scatter Plot for a Pair of Variables
# Replace 'variable1' and 'variable2' with actual column names
plt.figure(figsize=(10, 6))
sns.scatterplot(x=df['variable1'], y=df['variable2'])
plt.title('Scatter Plot of Variable1 vs Variable2')
plt.xlabel('Variable1')
plt.ylabel('Variable2')
plt.show()