# Island Economic Analysis

Comprehensive analysis of 100 years of island data with predictions for the next 5 years.

In [56]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

# Set visualization style
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (14, 8)

# Load the integrated dataset
df = pd.read_csv('integrated_population_data_year100.csv')

print("Dataset loaded successfully!")
print("="*80)
print(f"Total records: {len(df):,}")
print(f"Columns: {list(df.columns)}")
print("\nFirst few rows:")
print(df.head(10))
print("\nDataset info:")
print(df.info())
print("\nBasic statistics:")
print(df.describe())

Dataset loaded successfully!
Total records: 50,994
Columns: ['year', 'sex', 'profession', 'income', 'net_worth', 'age', 'happiness']

First few rows:
   year sex     profession   income  net_worth  age  happiness
0     0   F  civil servant  5131.08    10000.0   40      100.0
1     0   M          child     0.00    10000.0    1      100.0
2     0   M     unemployed   124.10    10000.0   25      100.0
3     0   M         farmer  1868.39    10000.0   36      100.0
4     0   F          child     0.00    10000.0    1      100.0
5     0   F  civil servant  5045.29    10000.0   41      100.0
6     0   F          child     0.00    10000.0    8      100.0
7     0   M      homemaker   149.64    10000.0   30      100.0
8     0   M          child     0.00    10000.0    3      100.0
9     0   M         fisher  1381.20    10000.0   24      100.0

Dataset info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50994 entries, 0 to 50993
Data columns (total 7 columns):
 #   Column      Non-Null Count  D

## 1. Key Statistics & Insights

In [57]:
# Calculate GDP data from population
yearly_stats = df.groupby('year').agg({
    'income': ['mean', 'sum', 'median'],
    'net_worth': ['mean', 'sum'],
    'age': ['mean', 'median'],
    'happiness': 'mean',
    'year': 'count'
}).reset_index()

yearly_stats.columns = ['year', 'avg_income', 'total_income', 'median_income', 
                        'avg_networth', 'total_networth', 'avg_age', 'median_age', 
                        'avg_happiness', 'population']

# Calculate GDP (using total income as proxy for GDP)
yearly_stats['gdp'] = yearly_stats['total_income']

print("="*80)
print("ISLAND ECONOMIC OVERVIEW (100 Years)")
print("="*80)
print(f"\nüìä Population Statistics:")
print(f"   Average population per year: {yearly_stats['population'].mean():.0f}")
print(f"   Population range: {yearly_stats['population'].min():.0f} - {yearly_stats['population'].max():.0f}")

print(f"\nüí∞ Income Statistics:")
print(f"   Overall average income: ${df['income'].mean():,.2f}")
print(f"   Highest individual income: ${df['income'].max():,.2f}")
print(f"   Average yearly total income: ${yearly_stats['total_income'].mean():,.2f}")

print(f"\nüè¶ Net Worth Statistics:")
print(f"   Average net worth: ${df['net_worth'].mean():,.2f}")
print(f"   Highest net worth: ${df['net_worth'].max():,.2f}")
print(f"   Lowest net worth: ${df['net_worth'].min():,.2f}")

print(f"\nüë• Demographics:")
print(f"   Average age: {df['age'].mean():.1f} years")
print(f"   Age range: {df['age'].min():.0f} - {df['age'].max():.0f} years")

print(f"\nüòä Happiness:")
print(f"   Average happiness: {df['happiness'].mean():.2f}")
print(f"   Happiness range: {df['happiness'].min():.2f} - {df['happiness'].max():.2f}")


ISLAND ECONOMIC OVERVIEW (100 Years)

üìä Population Statistics:
   Average population per year: 505
   Population range: 496 - 515

üí∞ Income Statistics:
   Overall average income: $2,121.05
   Highest individual income: $12,543.60
   Average yearly total income: $1,070,901.60

üè¶ Net Worth Statistics:
   Average net worth: $24,298.59
   Highest net worth: $176,445.87
   Lowest net worth: $-52,795.51

üë• Demographics:
   Average age: 36.2 years
   Age range: 0 - 82 years

üòä Happiness:
   Average happiness: 100.09
   Happiness range: 65.00 - 122.61


In [58]:

print(f"\nüìà GDP Statistics:")
print(f"   Average GDP: ${yearly_stats['gdp'].mean():,.2f}")
print(f"   GDP growth over 100 years: {(yearly_stats['gdp'].iloc[-1] / yearly_stats['gdp'].iloc[0] - 1) * 100:.1f}%")
print(f"   Year 0 GDP: ${yearly_stats['gdp'].iloc[0]:,.2f}")
print(f"   Year 99 GDP: ${yearly_stats['gdp'].iloc[-1]:,.2f}")


üìà GDP Statistics:
   Average GDP: $1,070,901.60
   GDP growth over 100 years: 27.2%
   Year 0 GDP: $947,610.99
   Year 99 GDP: $1,205,293.20


## 2. Highest Income Professions Analysis

In [64]:
# Analyze professions (excluding children)
working_pop = df[df['profession'] != 'child'].copy()

# Overall profession statistics (across all 100 years)
profession_stats = working_pop.groupby('profession').agg({
    'income': ['mean', 'median', 'max', 'min', 'count'],
    'net_worth': 'mean',
    'age': 'mean',
    'happiness': 'mean'
}).reset_index()

profession_stats.columns = ['profession', 'avg_income', 'median_income', 'max_income', 
                            'min_income', 'count', 'avg_networth', 'avg_age', 'avg_happiness']
profession_stats = profession_stats.sort_values('avg_income', ascending=False)

print("="*80)
print("TOP 10 PROFESSIONS BY AVERAGE INCOME (100-Year Average)")
print("="*80)
print(profession_stats[['profession', 'avg_income', 'median_income', 'count']].head(10).to_string(index=False))

# Analyze by year: income contribution per year
yearly_profession = working_pop.groupby(['year', 'profession']).agg({
    'income': ['mean', 'count', 'sum']
}).reset_index()
yearly_profession.columns = ['year', 'profession', 'avg_income', 'worker_count', 'total_income']

# For each year, find top professions by total income contribution
print("\n" + "="*80)
print("TOP PROFESSIONS BY INCOME CONTRIBUTION PER YEAR")
print("="*80)
print("Showing sample years: 0, 25, 50, 75, 99")
print("-"*80)

for sample_year in [0, 25, 50, 75, 99]:
    year_data = yearly_profession[yearly_profession['year'] == sample_year].sort_values('total_income', ascending=False)
    print(f"\nYear {sample_year} (GDP: ${yearly_stats[yearly_stats['year']==sample_year]['gdp'].values[0]:,.2f}):")
    print(f"{'Profession':<20} {'Avg Income':<15} {'Workers':<10} {'Total Income':<20}")
    print("-"*80)
    for idx, row in year_data.head(5).iterrows():
        print(f"{row['profession']:<20} ${row['avg_income']:>12,.2f}  {row['worker_count']:>7.0f}  ${row['total_income']:>16,.2f}")
    
    # Verify GDP calculation
    year_total = year_data['total_income'].sum()
    print(f"{'TOTAL (all professions)':<20} {'':<15} {year_data['worker_count'].sum():>7.0f}  ${year_total:>16,.2f}")

# Calculate average yearly contribution by profession
avg_yearly_contribution = yearly_profession.groupby('profession').agg({
    'total_income': 'mean',
    'worker_count': 'mean',
    'avg_income': 'mean'
}).reset_index()
avg_yearly_contribution.columns = ['profession', 'avg_yearly_contribution', 'avg_workers_per_year', 'avg_income']
avg_yearly_contribution = avg_yearly_contribution.sort_values('avg_yearly_contribution', ascending=False)

print("\n" + "="*80)
print("TOP 10 PROFESSIONS BY AVERAGE YEARLY INCOME CONTRIBUTION")
print("="*80)
print(avg_yearly_contribution[['profession', 'avg_income', 'avg_workers_per_year', 'avg_yearly_contribution']].head(10).to_string(index=False))




TOP 10 PROFESSIONS BY AVERAGE INCOME (100-Year Average)
      profession  avg_income  median_income  count
   civil servant 6182.446960       6012.330   3313
service provider 6062.442177       5833.130   3867
       craftsman 4800.816679       4580.475   5152
          fisher 3136.927572       2848.625   6264
          farmer 1840.492776       1771.340   6722
         retired 1606.043514       1271.650   4847
      unemployed   12.342250         63.410   2493
       homemaker  -53.608001         25.770   6257

TOP PROFESSIONS BY INCOME CONTRIBUTION PER YEAR
Showing sample years: 0, 25, 50, 75, 99
--------------------------------------------------------------------------------

Year 0 (GDP: $947,610.99):
Profession           Avg Income      Workers    Total Income        
--------------------------------------------------------------------------------
civil servant        $    5,357.96       43  $      230,392.30
service provider     $    5,215.76       41  $      213,846.07
craftsman  

In [65]:
print("\n" + "="*80)
print("HIGHEST EARNING INDIVIDUAL")
print("="*80)
highest_earner = working_pop.loc[working_pop['income'].idxmax()]
print(f"Profession: {highest_earner['profession']}")
print(f"Income: ${highest_earner['income']:,.2f}")
print(f"Net Worth: ${highest_earner['net_worth']:,.2f}")
print(f"Age: {highest_earner['age']:.0f} years")
print(f"Year: {highest_earner['year']}")
print(f"Sex: {highest_earner['sex']}")
print(f"Happiness: {highest_earner['happiness']:.2f}")

print("\n" + "="*80)
print("PROFESSION COMPARISON - Full Statistics")
print("="*80)
print(avg_yearly_contribution[['profession', 'avg_income', 'avg_workers_per_year', 'avg_yearly_contribution']].head(10).to_string(index=False))


HIGHEST EARNING INDIVIDUAL
Profession: service provider
Income: $12,543.60
Net Worth: $136,315.90
Age: 69 years
Year: 37
Sex: F
Happiness: 102.99

PROFESSION COMPARISON - Full Statistics
      profession  avg_income  avg_workers_per_year  avg_yearly_contribution
       craftsman 4794.816439             51.009901            244889.183465
service provider 6057.800880             38.287129            232113.503960
   civil servant 6194.372679             32.801980            202796.502772
          fisher 3122.251893             62.019802            194551.626832
          farmer 1838.298031             66.554455            122492.994455
         retired 1632.594033             48.470000             77844.929100
      unemployed   -8.765745             24.683168               304.645842
       homemaker  -59.990987             61.950495             -3321.042178


In [69]:
# Analysis of last 10 years (Year 91-100)
print("\n" + "="*80)
print("LAST 10 YEARS ANALYSIS (Year 91-100)")
print("="*80)

# Filter data for years 91-100
last_10_years = yearly_profession[yearly_profession['year'].between(91, 100)]

# Group by profession for these years
last_10_prof_stats = last_10_years.groupby('profession').agg({
    'avg_income': 'mean',
    'worker_count': 'mean',
    'total_income': 'mean'
}).reset_index()
last_10_prof_stats.columns = ['profession', 'avg_income', 'avg_workers', 'avg_contribution']

# Sort by average income
last_10_by_income = last_10_prof_stats.sort_values('avg_income', ascending=False)
print("\nTop 5 Professions by Average Income (Year 91-100):")
print(f"{'Profession':<20} {'Avg Income':<15} {'Avg Workers':<15}")
print("-"*80)
for idx, row in last_10_by_income.head(5).iterrows():
    print(f"{row['profession']:<20} ${row['avg_income']:>12,.2f}  {row['avg_workers']:>12.1f}")

# Sort by income contribution (avg_income √ó avg_workers)
last_10_by_contribution = last_10_prof_stats.sort_values('avg_contribution', ascending=False)
print("\nTop 5 Professions by Income Contribution (Year 91-100):")
print(f"{'Profession':<20} {'Avg Income':<15} {'Avg Workers':<15} {'Contribution':<20}")
print("-"*80)
for idx, row in last_10_by_contribution.head(5).iterrows():
    print(f"{row['profession']:<20} ${row['avg_income']:>12,.2f}  {row['avg_workers']:>12.1f}  ${row['avg_contribution']:>16,.2f}")



LAST 10 YEARS ANALYSIS (Year 91-100)

Top 5 Professions by Average Income (Year 91-100):
Profession           Avg Income      Avg Workers    
--------------------------------------------------------------------------------
civil servant        $    6,660.48          27.4
service provider     $    6,568.27          34.8
craftsman            $    5,525.78          49.1
fisher               $    3,501.42          74.0
farmer               $    2,093.88          68.9

Top 5 Professions by Income Contribution (Year 91-100):
Profession           Avg Income      Avg Workers     Contribution        
--------------------------------------------------------------------------------
craftsman            $    5,525.78          49.1  $      270,990.34
fisher               $    3,501.42          74.0  $      258,530.46
service provider     $    6,568.27          34.8  $      228,330.62
civil servant        $    6,660.48          27.4  $      182,974.29
farmer               $    2,093.88          68.

In [71]:
# Year-by-year breakdown for Craftsman (Year 91-100)
print("="*80)
print("CRAFTSMAN - YEAR-BY-YEAR BREAKDOWN (Year 91-100)")
print("="*80)
craftsman_years = last_10_years[last_10_years['profession'] == 'craftsman'].sort_values('year')
print(f"{'Year':<8} {'Avg Income':<15} {'Workers':<10} {'Total Contribution':<20}")
print("-"*80)
for idx, row in craftsman_years.iterrows():
    print(f"{int(row['year']):<8} ${row['avg_income']:>12,.2f}  {row['worker_count']:>7.0f}  ${row['total_income']:>16,.2f}")
print("-"*80)
print(f"{'AVERAGE':<8} ${craftsman_years['avg_income'].mean():>12,.2f}  {craftsman_years['worker_count'].mean():>7.1f}  ${craftsman_years['total_income'].mean():>16,.2f}")

CRAFTSMAN - YEAR-BY-YEAR BREAKDOWN (Year 91-100)
Year     Avg Income      Workers    Total Contribution  
--------------------------------------------------------------------------------
91       $    5,371.74       52  $      279,330.35
92       $    5,461.18       51  $      278,520.25
93       $    5,272.07       51  $      268,875.75
94       $    5,385.80       50  $      269,290.22
95       $    5,425.16       51  $      276,683.10
96       $    5,526.89       50  $      276,344.52
97       $    5,628.78       49  $      275,810.35
98       $    5,748.32       48  $      275,919.19
99       $    5,766.68       46  $      265,267.24
100      $    5,671.22       43  $      243,862.47
--------------------------------------------------------------------------------
AVERAGE  $    5,525.78     49.1  $      270,990.34


In [72]:
# Year-by-year breakdown for Fisher (Year 91-100)
print("="*80)
print("FISHER - YEAR-BY-YEAR BREAKDOWN (Year 91-100)")
print("="*80)
fisher_years = last_10_years[last_10_years['profession'] == 'fisher'].sort_values('year')
print(f"{'Year':<8} {'Avg Income':<15} {'Workers':<10} {'Total Contribution':<20}")
print("-"*80)
for idx, row in fisher_years.iterrows():
    print(f"{int(row['year']):<8} ${row['avg_income']:>12,.2f}  {row['worker_count']:>7.0f}  ${row['total_income']:>16,.2f}")
print("-"*80)
print(f"{'AVERAGE':<8} ${fisher_years['avg_income'].mean():>12,.2f}  {fisher_years['worker_count'].mean():>7.1f}  ${fisher_years['total_income'].mean():>16,.2f}")

FISHER - YEAR-BY-YEAR BREAKDOWN (Year 91-100)
Year     Avg Income      Workers    Total Contribution  
--------------------------------------------------------------------------------
91       $    4,583.16       74  $      339,154.19
92       $    2,677.71       77  $      206,183.42
93       $    2,687.62       77  $      206,946.95
94       $    4,629.16       78  $      361,074.21
95       $    2,780.97       77  $      214,134.94
96       $    2,797.54       76  $      212,613.25
97       $    4,740.48       74  $      350,795.71
98       $    2,764.10       72  $      199,015.44
99       $    2,706.51       68  $      184,042.98
100      $    4,646.92       67  $      311,343.48
--------------------------------------------------------------------------------
AVERAGE  $    3,501.42     74.0  $      258,530.46


In [73]:
# Year-by-year breakdown for Service Provider (Year 91-100)
print("="*80)
print("SERVICE PROVIDER - YEAR-BY-YEAR BREAKDOWN (Year 91-100)")
print("="*80)
service_years = last_10_years[last_10_years['profession'] == 'service provider'].sort_values('year')
print(f"{'Year':<8} {'Avg Income':<15} {'Workers':<10} {'Total Contribution':<20}")
print("-"*80)
for idx, row in service_years.iterrows():
    print(f"{int(row['year']):<8} ${row['avg_income']:>12,.2f}  {row['worker_count']:>7.0f}  ${row['total_income']:>16,.2f}")
print("-"*80)
print(f"{'AVERAGE':<8} ${service_years['avg_income'].mean():>12,.2f}  {service_years['worker_count'].mean():>7.1f}  ${service_years['total_income'].mean():>16,.2f}")

SERVICE PROVIDER - YEAR-BY-YEAR BREAKDOWN (Year 91-100)
Year     Avg Income      Workers    Total Contribution  
--------------------------------------------------------------------------------
91       $    6,395.03       38  $      243,011.22
92       $    6,371.08       37  $      235,729.96
93       $    6,257.95       35  $      219,028.33
94       $    6,469.46       35  $      226,431.14
95       $    6,401.54       35  $      224,054.06
96       $    6,590.77       35  $      230,677.10
97       $    6,691.37       34  $      227,506.55
98       $    6,882.41       34  $      234,001.90
99       $    6,926.82       33  $      228,585.17
100      $    6,696.28       32  $      214,280.82
--------------------------------------------------------------------------------
AVERAGE  $    6,568.27     34.8  $      228,330.62


In [75]:
# Age analysis for last 10 years (Year 91-100)
print("="*80)
print("AGE ANALYSIS - LAST 10 YEARS (Year 91-100)")
print("="*80)

# Filter working population for years 91-100
last_10_working = working_pop[working_pop['year'].between(91, 100)]

# Calculate age statistics by profession
age_by_profession = last_10_working.groupby('profession').agg({
    'age': ['mean', 'median', 'min', 'max', 'count']
}).reset_index()
age_by_profession.columns = ['profession', 'avg_age', 'median_age', 'min_age', 'max_age', 'count']

# Sort by average age
age_by_avg = age_by_profession.sort_values('avg_age', ascending=False)
print("\nTop 5 Professions by Average Age (Year 91-100):")
print(f"{'Profession':<20} {'Avg Age':<12} {'Median Age':<12} {'Workers':<10}")
print("-"*80)
for idx, row in age_by_avg.head(5).iterrows():
    print(f"{row['profession']:<20} {row['avg_age']:>10.1f}  {row['median_age']:>10.1f}  {row['count']:>8.0f}")

# Sort by median age
age_by_median = age_by_profession.sort_values('median_age', ascending=False)
print("\nTop 5 Professions by Median Age (Year 91-100):")
print(f"{'Profession':<20} {'Median Age':<12} {'Avg Age':<12} {'Workers':<10}")
print("-"*80)
for idx, row in age_by_median.head(5).iterrows():
    print(f"{row['profession']:<20} {row['median_age']:>10.1f}  {row['avg_age']:>10.1f}  {row['count']:>8.0f}")

AGE ANALYSIS - LAST 10 YEARS (Year 91-100)

Top 5 Professions by Average Age (Year 91-100):
Profession           Avg Age      Median Age   Workers   
--------------------------------------------------------------------------------
retired                    68.6        68.0       569
craftsman                  44.8        47.0       491
homemaker                  44.8        47.0       554
farmer                     44.2        45.0       689
fisher                     44.2        47.0       740

Top 5 Professions by Median Age (Year 91-100):
Profession           Median Age   Avg Age      Workers   
--------------------------------------------------------------------------------
retired                    68.0        68.6       569
craftsman                  47.0        44.8       491
fisher                     47.0        44.2       740
homemaker                  47.0        44.8       554
farmer                     45.0        44.2       689


In [77]:
# Fisher age analysis - Year by year (91-100)
print("="*80)
print("FISHER - AGE ANALYSIS YEAR-BY-YEAR (Year 91-100)")
print("="*80)

# Calculate age statistics for fisher by year
fisher_age_yearly = last_10_working[last_10_working['profession'] == 'fisher'].groupby('year').agg({
    'age': ['mean', 'median', 'min', 'max', 'count']
}).reset_index()
fisher_age_yearly.columns = ['year', 'avg_age', 'median_age', 'min_age', 'max_age', 'worker_count']

print(f"{'Year':<8} {'Avg Age':<12} {'Median Age':<12} {'Min Age':<10} {'Max Age':<10} {'Workers':<10}")
print("-"*80)
for idx, row in fisher_age_yearly.iterrows():
    print(f"{int(row['year']):<8} {row['avg_age']:>10.1f}  {row['median_age']:>10.1f}  {row['min_age']:>8.0f}  {row['max_age']:>8.0f}  {row['worker_count']:>8.0f}")
print("-"*80)
print(f"{'AVERAGE':<8} {fisher_age_yearly['avg_age'].mean():>10.1f}  {fisher_age_yearly['median_age'].mean():>10.1f}  {fisher_age_yearly['min_age'].mean():>8.1f}  {fisher_age_yearly['max_age'].mean():>8.1f}  {fisher_age_yearly['worker_count'].mean():>8.1f}")

print(f"\nüìä Fisher Age Summary (Year 91-100):")
print(f"   Average age trend: {fisher_age_yearly['avg_age'].iloc[0]:.1f} ‚Üí {fisher_age_yearly['avg_age'].iloc[-1]:.1f}")
print(f"   Median age trend: {fisher_age_yearly['median_age'].iloc[0]:.1f} ‚Üí {fisher_age_yearly['median_age'].iloc[-1]:.1f}")
print(f"   Age range: {fisher_age_yearly['min_age'].min():.0f} - {fisher_age_yearly['max_age'].max():.0f} years")

FISHER - AGE ANALYSIS YEAR-BY-YEAR (Year 91-100)
Year     Avg Age      Median Age   Min Age    Max Age    Workers   
--------------------------------------------------------------------------------
91             43.2        46.0        18        63        74
92             43.2        45.0        18        64        77
93             43.5        46.0        18        64        77
94             43.7        46.5        18        64        78
95             44.4        47.0        19        64        77
96             45.2        47.5        20        64        76
97             45.0        47.5        18        64        74
98             44.9        48.0        18        64        72
99             44.0        45.5        18        64        68
100            44.7        46.0        19        64        67
--------------------------------------------------------------------------------
AVERAGE        44.2        46.5      18.4      63.9      74.0

üìä Fisher Age Summary (Year 91-100):