In [4]:
"""
COMPLETE NATURAL RESOURCE ANALYSIS
All code in one script
Output: /Users/leoss/Desktop/Portfolio/Website-/Capstone-Proj/individual_plots
"""

import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
import os

# ============================================================================
# CONFIGURATION
# ============================================================================

input_file = "/Users/leoss/Desktop/GitHub/Capstone/MASTER/Master.csv"
production_file = "/Users/leoss/Desktop/GitHub/Capstone/rawdata/NR_final_LEO.csv"
output_dir = "/Users/leoss/Desktop/Portfolio/Website-/Capstone-Proj/individual_plots"

os.makedirs(output_dir, exist_ok=True)

print("="*70)
print("NATURAL RESOURCE ANALYSIS - COMPLETE")
print("="*70)

# ============================================================================
# 1. LOAD AND PREPARE PRODUCTION DATA
# ============================================================================

print("\n1. Loading production data...")

df_prod = pd.read_csv(production_file)
df_master = pd.read_csv(input_file)

# Fix country names
country_fixes = {
    'Brunei': 'Brunei Darussalam', 'DR Congo': 'Congo, Dem. Rep.',
    'Democratic Republic of Congo': 'Congo, Dem. Rep.', 'Congo': 'Congo, Rep.',
    'Republic of Congo': 'Congo, Rep.', 'Egypt': 'Egypt, Arab Rep.',
    'Iran': 'Iran, Islamic Rep.', 'South Korea': 'Korea, Rep.',
    'Kyrgyzstan': 'Kyrgyz Republic', 'Laos': 'Lao PDR',
    'Slovakia': 'Slovak Republic', 'Turkey': 'Turkiye',
    'Trinidad & Tobago': 'Trinidad and Tobago', 'Venezuela': 'Venezuela, RB',
    'Vietnam': 'Viet Nam', 'Yemen': 'Yemen, Rep.', 'US': 'United States'
}
df_prod['Country'] = df_prod['Country'].replace(country_fixes)

# Categorize resources
def categorize_resource(resource):
    if resource == 'Oil': return 'Oil'
    elif resource == 'Natural Gas': return 'Natural Gas'
    elif resource == 'Coal': return 'Coal'
    else: return 'Metals'

prod_data = df_prod[df_prod['Metric'] == 'Production'].copy()
prod_data['Resource_Category'] = prod_data['Resource'].apply(categorize_resource)

# Aggregate by category, country, year
prod_agg = prod_data.groupby(['Country', 'Year', 'Resource_Category'])['Value'].sum().reset_index()
prod_wide = prod_agg.pivot_table(
    index=['Country', 'Year'], columns='Resource_Category', values='Value', fill_value=0
).reset_index()
prod_wide['Total'] = prod_wide[[c for c in prod_wide.columns if c not in ['Country', 'Year']]].sum(axis=1)

# Merge with master
master_data = df_master[['Country Name', 'Year', 'Country Code', 'Population', 
                         'GDP per capita (constant prices, PPP)', 'Economic Complexity Index',
                         'Human capital index', 'Manufacturing']].copy()

map_data = prod_wide.merge(master_data, left_on=['Country', 'Year'], 
                           right_on=['Country Name', 'Year'], how='inner')

map_data['GDP_total'] = map_data['GDP per capita (constant prices, PPP)'] * map_data['Population']

# Calculate variants
for res in ['Total', 'Oil', 'Natural Gas', 'Coal', 'Metals']:
    if res in map_data.columns:
        map_data[f'{res}_Per_Capita'] = map_data[res] / map_data['Population']
        map_data[f'{res}_GDP_Norm'] = (map_data[res] / map_data['GDP_total']) * 100

print(f"   Data: {map_data['Country Code'].nunique()} countries, {map_data['Year'].nunique()} years")

# ============================================================================
# 2. CREATE PRODUCTION MAP WITH TOGGLES
# ============================================================================

"""
FIXED: Production Map - Slider updates BOTH data AND hover
"""

print("\nCreating production map with working slider AND hover...")

# Initial traces for 2019
initial_data = map_data[map_data['Year'] == 2019]
traces = []

for resource in ['Total', 'Oil', 'Natural Gas', 'Coal', 'Metals']:
    for suffix, norm_type, units in [('', 'absolute', 'USD'), ('_Per_Capita', 'per_capita', 'USD/person'), 
                                     ('_GDP_Norm', 'gdp_norm', '% GDP')]:
        col = f'{resource}{suffix}'
        z = initial_data[col].fillna(0)
        
        # Format hover
        if norm_type == 'absolute':
            hover = [f"${v/1e9:.2f}B" if v >= 1e9 else f"${v/1e6:.1f}M" if v >= 1e6 else f"${v:,.0f}" for v in z]
        elif norm_type == 'per_capita':
            hover = [f"${v/1e3:.1f}K" if v >= 1e3 else f"${v:.0f}" for v in z]
        else:
            hover = [f"{v:.2f}%" for v in z]
        
        trace = go.Choropleth(
            locations=initial_data['Country Code'],
            z=z,
            text=initial_data['Country Name'],
            customdata=hover,
            colorscale='YlOrRd',
            marker=dict(line=dict(color='#999999', width=0.5)),
            colorbar=dict(title=units, len=0.7),
            hovertemplate=f'<b>%{{text}}</b><br>{resource}: %{{customdata}}<extra></extra>',
            visible=False
        )
        traces.append(trace)

traces[0].visible = True
fig = go.Figure(data=traces)

# Slider steps - update Z, locations, text, AND customdata
slider_steps = []
for year in sorted(map_data['Year'].unique()):
    year_data = map_data[map_data['Year'] == year]
    
    z_list = []
    hover_list = []
    
    for resource in ['Total', 'Oil', 'Natural Gas', 'Coal', 'Metals']:
        for suffix, norm_type, units in [('', 'absolute', 'USD'), ('_Per_Capita', 'per_capita', 'USD/person'), 
                                         ('_GDP_Norm', 'gdp_norm', '% GDP')]:
            col = f'{resource}{suffix}'
            z = year_data[col].fillna(0)
            
            # Format hover for this year
            if norm_type == 'absolute':
                hover = [f"${v/1e9:.2f}B" if v >= 1e9 else f"${v/1e6:.1f}M" if v >= 1e6 else f"${v:,.0f}" for v in z]
            elif norm_type == 'per_capita':
                hover = [f"${v/1e3:.1f}K" if v >= 1e3 else f"${v:.0f}" for v in z]
            else:
                hover = [f"{v:.2f}%" for v in z]
            
            z_list.append(z.tolist())
            hover_list.append(hover)
    
    step = dict(
        method="restyle",
        args=[{
            "z": z_list,
            "locations": [year_data['Country Code'].tolist()] * 15,
            "text": [year_data['Country Name'].tolist()] * 15,
            "customdata": hover_list  # UPDATE HOVER TOO!
        }],
        label=str(year)
    )
    slider_steps.append(step)

def get_vis(res_idx, norm_idx):
    vis = [False] * 15
    vis[res_idx * 3 + norm_idx] = True
    return vis

fig.update_layout(
    updatemenus=[
        dict(
            buttons=[
                dict(label="Total", method="update", args=[{"visible": get_vis(0, 0)}]),
                dict(label="Oil", method="update", args=[{"visible": get_vis(1, 0)}]),
                dict(label="Natural Gas", method="update", args=[{"visible": get_vis(2, 0)}]),
                dict(label="Coal", method="update", args=[{"visible": get_vis(3, 0)}]),
                dict(label="Metals", method="update", args=[{"visible": get_vis(4, 0)}]),
            ],
            direction="down", active=0, showactive=True,
            x=0.01, y=1.12, xanchor="left", yanchor="top",
            bgcolor="white", bordercolor="#002A54", borderwidth=2, font=dict(size=14)
        ),
        dict(
            buttons=[
                dict(label="Absolute", method="update", args=[{"visible": get_vis(0, 0)}]),
                dict(label="Per Capita", method="update", args=[{"visible": get_vis(0, 1)}]),
                dict(label="% of GDP", method="update", args=[{"visible": get_vis(0, 2)}]),
            ],
            direction="down", active=0, showactive=True,
            x=0.99, y=1.12, xanchor="right", yanchor="top",
            bgcolor="white", bordercolor="#E30613", borderwidth=2, font=dict(size=14)
        ),
    ],
    sliders=[{
        'active': len(slider_steps) - 1,
        'yanchor': 'top', 'xanchor': 'left',
        'currentvalue': {'prefix': 'Year: ', 'visible': True, 'xanchor': 'center',
                        'font': {'size': 18, 'color': '#002A54'}},
        'pad': {'b': 10, 't': 50}, 'len': 0.9, 'x': 0.05, 'y': 0,
        'steps': slider_steps
    }],
    title={'text': "Natural Resource Production", 'x': 0.5, 'font': {'size': 22}},
    geo=dict(showframe=False, showcoastlines=True, coastlinecolor='#aaa',
             projection_type='natural earth', bgcolor='#e3f2fd', landcolor='#fafafa',
             countrycolor='#999', countrywidth=0.5),
    height=700, margin={"r":50,"t":150,"l":50,"b":120}
)

fig.write_html(os.path.join(output_dir, 'map_production_final.html'))
print("✓ Map with working slider AND hover saved!")
print(f"  open '{os.path.join(output_dir, 'map_production_final.html')}'")

# ============================================================================
# 3. CLUSTERING ANALYSIS
# ============================================================================

print("\n3. Clustering analysis...")

cluster_year = 2019
cluster_data = map_data[map_data['Year'] == cluster_year].copy()

feature_cols = [
    'Metals_GDP_Norm', 'Oil_GDP_Norm', 'Natural Gas_GDP_Norm', 'Coal_GDP_Norm',
    'Economic Complexity Index', 'Human capital index'
]

cluster_subset = cluster_data[['Country Code', 'Country Name'] + feature_cols].dropna()

scaler = StandardScaler()
X_scaled = scaler.fit_transform(cluster_subset[feature_cols])

pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_scaled)

kmeans = KMeans(n_clusters=5, random_state=42, n_init=10)
cluster_subset['Cluster'] = kmeans.fit_predict(X_pca)
cluster_subset['PC1'] = X_pca[:, 0]
cluster_subset['PC2'] = X_pca[:, 1]

print(f"   Clusters: {cluster_subset['Cluster'].value_counts().sort_index().to_dict()}")

# Save
cluster_subset.to_csv(os.path.join(output_dir, 'cluster_assignments.csv'), index=False)

# ============================================================================
# 4. CLUSTER MAP
# ============================================================================

print("\n4. Creating cluster map...")

fig_cluster = go.Figure(data=go.Choropleth(
    locations=cluster_subset['Country Code'],
    z=cluster_subset['Cluster'],
    text=cluster_subset['Country Name'],
    colorscale='Viridis',
    marker=dict(line=dict(color='#999999', width=0.5)),
    colorbar=dict(title="Cluster", tickvals=[0,1,2,3,4], 
                  ticktext=['0','1','2','3','4'], len=0.7),
    hovertemplate='<b>%{text}</b><br>Cluster: %{z}<extra></extra>'
))

fig_cluster.update_layout(
    title={'text': "Country Clustering by Resource Production", 'x': 0.5, 'font': {'size': 20}},
    geo=dict(showframe=False, showcoastlines=True, coastlinecolor='#aaa',
             projection_type='natural earth', bgcolor='#e3f2fd'),
    height=700, margin={"r":50,"t":100,"l":50,"b":50}
)

fig_cluster.write_html(os.path.join(output_dir, 'map_clusters.html'))
print("   ✓ Cluster map saved")

# ============================================================================
# 5. PCA LOADINGS (PLOTLY)
# ============================================================================

print("\n5. Creating PCA loadings plots...")

loadings = pd.DataFrame(pca.components_.T, columns=['PC1', 'PC2'], index=feature_cols)

# PC1
fig_pc1 = go.Figure()
loadings_pc1 = loadings['PC1'].sort_values()
fig_pc1.add_trace(go.Bar(
    y=loadings_pc1.index, x=loadings_pc1.values, orientation='h',
    marker_color=['#2ecc71' if x > 0 else '#e74c3c' for x in loadings_pc1],
    text=[f"{x:.3f}" for x in loadings_pc1.values], textposition='outside'
))
fig_pc1.update_layout(
    title=f"PC1 Loadings ({pca.explained_variance_ratio_[0]:.1%} variance)",
    xaxis_title="Loading", height=500, showlegend=False,
    xaxis=dict(zeroline=True, zerolinewidth=2, zerolinecolor='black'),
    template='plotly_white'
)
fig_pc1.write_html(os.path.join(output_dir, 'pca_loadings_pc1.html'))

# PC2
fig_pc2 = go.Figure()
loadings_pc2 = loadings['PC2'].sort_values()
fig_pc2.add_trace(go.Bar(
    y=loadings_pc2.index, x=loadings_pc2.values, orientation='h',
    marker_color=['#2ecc71' if x > 0 else '#e74c3c' for x in loadings_pc2],
    text=[f"{x:.3f}" for x in loadings_pc2.values], textposition='outside'
))
fig_pc2.update_layout(
    title=f"PC2 Loadings ({pca.explained_variance_ratio_[1]:.1%} variance)",
    xaxis_title="Loading", height=500, showlegend=False,
    xaxis=dict(zeroline=True, zerolinewidth=2, zerolinecolor='black'),
    template='plotly_white'
)
fig_pc2.write_html(os.path.join(output_dir, 'pca_loadings_pc2.html'))

loadings.to_csv(os.path.join(output_dir, 'pca_loadings.csv'))
print("   ✓ PCA loadings saved")

# ============================================================================
# 6. SCATTER PLOT PC1 vs PC2
# ============================================================================

print("\n6. Creating scatter plot...")

fig_scatter = px.scatter(
    cluster_subset,
    x='PC1', y='PC2',
    color='Cluster',
    hover_data=['Country Name'],
    title='PCA Scatter Plot - Countries by Resource Production Patterns',
    color_continuous_scale='Viridis',
    labels={'PC1': 'PC1 (Economic Development)', 'PC2': 'PC2 (Hydrocarbon Production)'}
)
fig_scatter.update_traces(marker=dict(size=10, line=dict(width=1, color='white')))
fig_scatter.update_layout(height=600, template='plotly_white')
fig_scatter.write_html(os.path.join(output_dir, 'scatter_pca.html'))
print("   ✓ Scatter plot saved")

# ============================================================================
# SUMMARY
# ============================================================================

print("\n" + "="*70)
print("COMPLETE!")
print("="*70)
print(f"\nAll files saved to: {output_dir}")
print(f"\nGenerated files:")
print(f"  1. map_production.html - Production map with toggles")
print(f"  2. map_clusters.html - Cluster choropleth")
print(f"  3. pca_loadings_pc1.html - PC1 loadings")
print(f"  4. pca_loadings_pc2.html - PC2 loadings")
print(f"  5. scatter_pca.html - PCA scatter plot")
print(f"  6. cluster_assignments.csv - Cluster data")
print(f"  7. pca_loadings.csv - Loadings data")
print("="*70)

NATURAL RESOURCE ANALYSIS - COMPLETE

1. Loading production data...
   Data: 126 countries, 25 years

Creating production map with working slider AND hover...
✓ Map with working slider AND hover saved!
  open '/Users/leoss/Desktop/Portfolio/Website-/Capstone-Proj/individual_plots/map_production_final.html'

3. Clustering analysis...
   Clusters: {0: 38, 1: 44, 2: 13, 3: 24, 4: 5}

4. Creating cluster map...
   ✓ Cluster map saved

5. Creating PCA loadings plots...
   ✓ PCA loadings saved

6. Creating scatter plot...
   ✓ Scatter plot saved

COMPLETE!

All files saved to: /Users/leoss/Desktop/Portfolio/Website-/Capstone-Proj/individual_plots

Generated files:
  1. map_production.html - Production map with toggles
  2. map_clusters.html - Cluster choropleth
  3. pca_loadings_pc1.html - PC1 loadings
  4. pca_loadings_pc2.html - PC2 loadings
  5. scatter_pca.html - PCA scatter plot
  6. cluster_assignments.csv - Cluster data
  7. pca_loadings.csv - Loadings data
