# 1. Import Libraries


In [928]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import os
import folium
import json
import geopandas as gpd
import requests
import geoplot as gplt
import plotly.express as px

# 2. Import Dataframes

In [930]:
# Import ".json" file for the U.S. 

cityTemp_path ='/Users/ryumugil/Downloads/Achievement 6 Analysis'

column_list = ['Region', 'Country', 'State', 'Month', 'Day', 'Year', 'AvgTemperature']
df_cityTemp_clean = pd.read_csv(os.path.join(cityTemp_path, '02 Data', 'Original Data', 'city_temperature_cleaned.csv'))
df_cityTemp_clean.head()

Unnamed: 0,Region,Country,State,City,Month,Day,Year,AvgTemperature
0,Africa,Algeria,,Algiers,1.0,1.0,1995.0,64.2
1,Africa,Algeria,,Algiers,1.0,2.0,1995.0,49.4
2,Africa,Algeria,,Algiers,1.0,3.0,1995.0,48.8
3,Africa,Algeria,,Algiers,1.0,4.0,1995.0,46.4
4,Africa,Algeria,,Algiers,1.0,5.0,1995.0,47.9


# 3. Wrangle Data

2. Find a JSON/GeoJSONfile containing location data that corresponds to the location data in your student project data (or use this one if your analysis is about the US). We’ve listed a few suggestions to get you started:

Datahub—country polygons as GeoJSON;

In [None]:
# Clean the temperature data: Remove rows with invalid temperature values (e.g., -99)
df_cityTemp_clean = df_cityTemp_clean[df_cityTemp_clean['AvgTemperature'] != -99].dropna(subset=['Country', 'AvgTemperature'])
df_cityTemp_clean['AvgTemperature'] = pd.to_numeric(df_cityTemp_clean['AvgTemperature'], errors='coerce')
df_cityTemp_clean = df_cityTemp_clean.dropna(subset=['AvgTemperature'])

# Load the Country Polygons as GeoJSON from Datahub
geojson_url = 'https://datahub.io/core/geo-countries/r/countries.geojson' # original from the internet where data is downloaded from
new_country_geo_filepath = '/Users/ryumugil/Downloads/Achievement 6 Analysis/02 Data/Original Data/countries_downloaded.geojson' # new data downloaded and saved in this
manual_country_geo_filepath ='/Users/ryumugil/Downloads/Achievement 6 Analysis/02 Data/Original Data/countries.geojson' # manually downloaded geo data file. 

# try to downlosd from the internet using this url -> geojson_url. 
# If an error happens downloading, reading or writing to this path new_country_geo_filepath; then code in the `except` section with run!
try:
    # Attempt to download and save the GeoJSON
    response = requests.get(geojson_url)
    if response.status_code == 200: # if status is 200, it means successfully downloaded from internet
        with open(new_country_geo_filepath, 'wb') as f: # if this file -> new_country_geo_filepath exists and we can write(wb) to it
            f.write(response.content) # write content from the the downloaded file into the `new_country_geo_filepath` file
        print("GeoJSON downloaded and loaded successfully.")
    else:
        raise Exception(f"Failed to download GeoJSON, status code: {response.status_code}") # if error happened above, print the message
except Exception as e: # if we are here, it means some error happened in the `try` above. Then just load data from the manually downloaded file manual_country_geo_filepath
    print(f"Error downloading GeoJSON from URL: {e}")
    # Fallback to a local file or alternative source
    if os.path.exists(manual_country_geo_filepath):
        world = gpd.read_file(manual_country_geo_filepath)
        print("Loaded GeoJSON from local file.")
    else:
        raise FileNotFoundError("No local GeoJSON file found. Please download manually from https://datahub.io/core/geo-countries/r/countries.geojson.")

# Normalize GeoJSON country names
world['name'] = world['name'].str.strip().str.lower()

# Merge temperature data with GeoJSON data
merged_data = world.merge(df_cityTemp_clean, left_on='name', right_on='Country', how='left')

# Handle unmatched countries by filling NaN with a default value for plotting
merged_data['AvgTemperature'] = merged_data['AvgTemperature'].fillna(0)  # Use 0 for unmatched, adjust as needed

# Check the merged dataset
print("Merged dataset head:\n", merged_data.head())
print("Countries with temperature data:\n", merged_data['name'][merged_data['AvgTemperature'] > 0].unique())

# Basic plot to verify GeoJSON
world.plot()
plt.title('World Map from GeoJSON (Verification)')
plt.show()

In [None]:
new_country_geo_filepath = '/Users/ryumugil/Downloads/Achievement 6 Analysis/02 Data/Original Data/countries_downloaded.geojson' # new data downloaded and saved in this
new_country_geo = gpd.read_file(new_country_geo_filepath)
new_country_geo.head()

In [None]:
new_country_geo.dtypes

In [None]:
# Check for null values
new_country_geo.isnull().sum()

In [None]:
# Check for null or empty geometries
world = world.dropna(subset=['geometry'])

In [None]:
# Remove rows with missing or invalid country names
world = world.dropna(subset=['name'])
world['name'] = world['name'].str.strip().str.lower()  # Standardize to lowercase
world['ISO3166-1-Alpha-3'] = world['ISO3166-1-Alpha-3'].str.strip().str.upper()  # Standardize ISO codes

In [None]:
# Remove duplicates based on 'name' and 'ISO3166-1-Alpha-3'
world = world.drop_duplicates(subset=['name', 'ISO3166-1-Alpha-3'], keep='first')


In [None]:
# Save the cleaned GeoJSON
Cleaned_new_country_geo_path = 'cleaned_countries.geojson'
world.to_file(Cleaned_new_country_geo_path, driver='GeoJSON')
print(f"Cleaned GeoJSON saved as '{Cleaned_new_country_geo_path}'.")

# 4. Create Choropleth

In [None]:
# Clean and standardize country names for merging
df_cityTemp_clean['Country'] = df_cityTemp_clean['Country'].str.strip().str.lower()
world['name'] = world['name'].str.strip().str.lower()

# Merge temperature data with GeoJSON
merged_data = world.merge(df_cityTemp_clean, left_on='name', right_on='Country', how='left')

# Fill NaN values with 0 for countries without temperature data
merged_data['AvgTemperature'] = merged_data['AvgTemperature'].fillna(0)

# Convert GeoDataFrame to a format compatible with Plotly (ensure geometry is handled)
merged_data = merged_data.to_crs(epsg=4326)  # WGS84 for web mapping
merged_data = merged_data.drop(columns=['geometry'])  # Drop geometry for Plotly, use GeoJSON source

# Create the choropleth map with Plotly
fig = px.choropleth_mapbox(
    merged_data,
    geojson=world.__geo_interface__,  # Use the original GeoJSON geometry
    locations=merged_data.index,
    color='AvgTemperature',
    color_continuous_scale='YlOrRd',  # Yellow to Red colormap
    range_color=[0, 30],  # Adjust range based on expected temperature (0°C to 30°C)
    title='Choropleth Map of Average Temperature by Country',
    hover_data=['name', 'AvgTemperature'],
    mapbox_style="carto-positron",
    center={"lat": 0, "lon": 0},  # Center on world
    zoom=1,
    opacity=0.7,
    labels={'AvgTemperature': 'Avg Temp (°C)'}
)

# Update layout for better presentation
fig.update_layout(
    margin={"r":0,"t":40,"l":0,"b":0},
    coloraxis_colorbar_title="Avg Temp (°C)"
)

# Add a note about data coverage
fig.add_annotation(
    text="Note: 0°C indicates no temperature data for that country",
    xref="paper", yref="paper",
    x=0.02, y=0.02,
    showarrow=False,
    font=dict(size=12),
    bgcolor="white", opacity=0.8
)



# Discuss the results and what they mean in a markdown section.

Yes, answers the question which was "Are there differences in temperature patterns between regions"? by using the choropleth map to visualize average temperatures across countries, highlighting regional variations (e.g., warmer in Africa, cooler in Europe) based on the merged temperature and GeoJSON data.

Yes,the analysis raises new questions, such as: "What factors (e.g., latitude, altitude) explain the temperature variations observed across regions?" and "How have temperature patterns changed over the 1995–2020 period in underrepresented regions?" 

In [None]:
# Save the map as an HTML file
output_html = 'choropleth_temperature_map.html'
fig.write_html(output_html, include_plotlyjs='cdn')  # Use CDN for smaller file size
