# Classification de Koppen d'une liste de stations


In [3]:
! git clone https://github.com/nanopiero/tp_python_avance.git

Cloning into 'tp_python_avance'...
remote: Enumerating objects: 9, done.[K
remote: Counting objects:  11% (1/9)[Kremote: Counting objects:  22% (2/9)[Kremote: Counting objects:  33% (3/9)[Kremote: Counting objects:  44% (4/9)[Kremote: Counting objects:  55% (5/9)[Kremote: Counting objects:  66% (6/9)[Kremote: Counting objects:  77% (7/9)[Kremote: Counting objects:  88% (8/9)[Kremote: Counting objects: 100% (9/9)[Kremote: Counting objects: 100% (9/9), done.[K
remote: Compressing objects: 100% (7/7), done.[K
remote: Total 9 (delta 1), reused 0 (delta 0), pack-reused 0 (from 0)[K
Receiving objects: 100% (9/9), 23.99 KiB | 11.99 MiB/s, done.
Resolving deltas: 100% (1/1), done.


In [5]:
! ls tp_python_avance

koppen.ipynb  NORMALES_mens.data  README.md


In [6]:
import pandas as pd

# Load the .data file into a DataFrame
# Replace 'your_file_path.data' with the actual path to your file
file_path = 'tp_python_avance/NORMALES_mens.data'

# Use the read_csv function, specifying the delimiter and decimal character
df = pd.read_csv(file_path, delimiter=';', decimal=',')

# Display the first few rows of the DataFrame to check the data
print(df.head())

     POSTE       NOM  ALT  DATE  RR_RRMOY  T_TMMOY
0  1089001  AMBERIEU  250     1      84.9      3.2
1  1089001  AMBERIEU  250     2      70.0      4.2
2  1089001  AMBERIEU  250     3      75.0      8.0
3  1089001  AMBERIEU  250     4      87.2     11.3
4  1089001  AMBERIEU  250     5     106.4     15.2


In [7]:
import pandas as pd

# Rename columns for clarity
df.columns = ['POSTE', 'NOM', 'ALT', 'DATE', 'RR_RRMOY', 'T_TMMOY']

# Create two new columns with month-specific names for precipitation and temperature
df['RR'] = df['DATE'].apply(lambda x: f"{int(x):02d}_RR")  # Formats as '01_RR', '02_RR', etc.
df['T'] = df['DATE'].apply(lambda x: f"{int(x):02d}_T")    # Formats as '01_T', '02_T', etc.

# Pivot for precipitation and temperature, separately
df_rr = df.pivot(index='NOM', columns='RR', values='RR_RRMOY')
df_t = df.pivot(index='NOM', columns='T', values='T_TMMOY')

# Merge the two pivoted DataFrames, keeping 'ALT' as a separate column
df_alt = df[['NOM', 'ALT']].drop_duplicates().set_index('NOM')
result_df = pd.concat([df_alt, df_rr, df_t], axis=1).reset_index()

# Display the reorganized DataFrame
print(result_df.head())


              NOM  ALT  01_RR  02_RR  03_RR  04_RR  05_RR  06_RR  07_RR  \
0        AMBERIEU  250   84.9   70.0   75.0   87.2  106.4   88.8   86.0   
1      ST QUENTIN   98   54.1   48.0   51.3   43.2   57.1   59.8   60.2   
2  VICHY-CHARMEIL  249   48.1   37.5   43.5   68.5   88.4   72.7   75.7   
3        ST AUBAN  458   48.2   35.9   44.7   64.8   63.9   53.5   35.7   
4          EMBRUN  873   51.0   42.9   49.5   57.0   69.3   61.1   49.2   

   08_RR  ...  04_T  05_T  06_T  07_T  08_T  09_T  10_T  11_T  12_T  13_T  
0   83.0  ...  11.3  15.2  19.0  21.1  20.9  16.7  12.6   7.1   3.9  11.9  
1   70.8  ...  10.0  13.4  16.2  18.4  18.4  15.2  11.4   6.9   4.1  10.8  
2   76.1  ...  10.5  14.4  18.1  20.2  20.1  16.2  12.6   7.5   4.6  11.7  
3   50.7  ...  12.0  16.0  20.2  23.1  22.8  18.4  14.0   8.7   5.2  13.4  
4   52.1  ...  10.2  14.1  18.0  20.6  20.5  16.1  11.8   6.4   2.9  11.1  

[5 rows x 28 columns]


In [9]:
import numpy as np

def koppen_class(row):
    # Retrieve altitude, monthly data, and annual data where available
    alt = row['ALT']
    monthly_precip = [row[f"{str(month).zfill(2)}_RR"] for month in range(1, 13)]
    monthly_temp = [row[f"{str(month).zfill(2)}_T"] for month in range(1, 13)]
    annual_precip = row.get("13_RR", np.nan)
    annual_temp = row.get("13_T", np.nan)

    # Check for NaN or None values
    if any(pd.isna(val) for val in monthly_precip + monthly_temp + [annual_precip, annual_temp]):
        return 'no_class'

    # Calculate mean annual temperature and total annual precipitation
    mean_annual_temp = np.mean(monthly_temp)
    total_annual_precip = sum(monthly_precip)

    # Classification based on Köppen criteria
    if mean_annual_temp > 18 and min(monthly_temp) > 18:
        # Tropical climates (A)
        if total_annual_precip >= 60 * 12:
            return 'Af'  # Tropical rainforest (no dry season)
        elif min(monthly_precip) < 60 and sum(monthly_precip[:3]) < 60:
            return 'Aw'  # Tropical savanna (dry winter)
        elif min(monthly_precip) < 60:
            return 'Am'  # Tropical monsoon
    elif total_annual_precip < (mean_annual_temp * 20):
        # Arid climates (B)
        threshold = mean_annual_temp * 20
        if total_annual_precip < threshold / 2:
            return 'BWh' if mean_annual_temp > 18 else 'BWk'  # Hot or cold desert
        else:
            return 'BSh' if mean_annual_temp > 18 else 'BSk'  # Hot or cold steppe
    elif -3 <= min(monthly_temp) < 18 and max(monthly_temp) > 10:
        # Temperate climates (C)
        if min(monthly_precip[5:8]) > max(monthly_precip[:5] + monthly_precip[8:]):
            return 'Csa' if max(monthly_temp) > 22 else 'Csb'  # Mediterranean (dry summer)
        elif max(monthly_precip[:5] + monthly_precip[8:]) > min(monthly_precip[5:8]):
            return 'Cwa' if max(monthly_temp) > 22 else 'Cwb'  # Monsoon-influenced humid subtropical
        else:
            return 'Cfa' if max(monthly_temp) > 22 else 'Cfb'  # Oceanic
    elif min(monthly_temp) < -3 and max(monthly_temp) > 10:
        # Continental climates (D)
        if min(monthly_precip[5:8]) > max(monthly_precip[:5] + monthly_precip[8:]):
            return 'Dsa' if max(monthly_temp) > 22 else 'Dsb'  # Continental Mediterranean
        elif max(monthly_precip[:5] + monthly_precip[8:]) > min(monthly_precip[5:8]):
            return 'Dwa' if max(monthly_temp) > 22 else 'Dwb'  # Continental monsoon
        else:
            return 'Dfa' if max(monthly_temp) > 22 else 'Dfb'  # Humid continental
    elif max(monthly_temp) < 10:
        # Polar climates (E)
        return 'ET' if max(monthly_temp) > 0 else 'EF'  # Tundra or Ice cap

    return 'no_class'

# Apply the function to each row in result_df
result_df['koppen_class'] = result_df.apply(koppen_class, axis=1)

# Display the updated DataFrame with Köppen class
print(result_df[['NOM', 'ALT', 'koppen_class']].head())


              NOM  ALT koppen_class
0        AMBERIEU  250          Cwb
1      ST QUENTIN   98          Cwb
2  VICHY-CHARMEIL  249          Cwb
3        ST AUBAN  458          Cwa
4          EMBRUN  873          Cwb


In [10]:
# Create the dictionary with lists of station names by koppen_class
koppen_dict = result_df.groupby('koppen_class')['NOM'].apply(list).to_dict()

# Display the dictionary
print(koppen_dict)


{'Af': ['LE RAIZET AERO', 'ST-BARTHELEMY METEO', 'LAMENTIN-AERO', 'CAYENNE-MATOURY', 'SAINT GEORGES', 'MARIPASOULA', 'GILLOT-AEROPORT', 'TROMELIN', 'PAMANDZI', 'MAOPOOPO', 'HIHIFO', 'BORA-BORA-MOTU-AERO', 'FAAA', 'MANGAREVA', 'HIVA-OA', 'RAPA', 'TAKAROA', 'OUANAHAM', 'NOUMEA', 'LA TONTOUTA'], 'Cwa': ['ST AUBAN', 'CANNES', 'NICE', 'LANAS SYN', 'CARCASSONNE', 'ISTRES', 'MARIGNANE', 'SALON DE PROVENCE', 'AJACCIO', 'CAP PERTUSATO', 'CALVI', 'ILE ROUSSE', 'FIGARI', 'BASTIA', 'SOLENZARA', 'MONTELIMAR', 'NIMES-COURBESSAC', 'NIMES-GARONS', 'TOULOUSE-BLAGNAC', 'MONTPELLIER-AEROPORT', 'BEZIERS-VIAS', 'SETE', 'PERPIGNAN', 'LYON-BRON', 'LYON-ST EXUPERY', 'ALBI', 'MONTAUBAN', 'LE LUC', 'HYERES', 'ILE DU LEVANT', 'AVIGNON', 'CARPENTRAS', 'ORANGE'], 'Cwb': ['AMBERIEU', 'ST QUENTIN', 'VICHY-CHARMEIL', 'EMBRUN', 'CHARLEVILLE-MEZ', 'ST GIRONS', 'TROYES-BARBEREY', 'MILLAU', 'RODEZ-AVEYRON', 'CAEN-CARPIQUET', 'ST GATIEN DES B', 'AURILLAC', 'COGNAC', 'LA ROCHELLE-ILE DE RE', 'CHASSIRON', 'BOURGES', 'AVORD'

In [11]:
import re
from collections import defaultdict

# Helper function to extract the town name
def extract_town_name(station_name):
    # Normalize "Saint" variations
    match = re.match(r"(Saint[\-\s\.]*|St[\-\.\s]*)?([A-Za-z\-]+)", station_name, re.IGNORECASE)
    if match:
        # Combine 'Saint' with the rest of the name if present
        return match.group(0).strip()
    return station_name.split()[0]

# Step 1: Group station names by town
town_groups = defaultdict(list)
for idx, row in result_df.iterrows():
    town_name = extract_town_name(row['NOM'])
    town_groups[town_name].append((row['NOM'], row['koppen_class']))

# Step 2: Check class consistency within each town group and build the new dictionary
consistent_koppen_dict = defaultdict(list)

for town, stations in town_groups.items():
    # Extract classes, ignoring 'no_class'
    classes = set([class_ for _, class_ in stations if class_ != 'no_class'])

    if len(classes) == 1:
        # All classes are consistent, add town to the unique class
        consistent_class = classes.pop() if classes else 'no_class'
        consistent_koppen_dict[consistent_class].append(town)
    else:
        # Inconsistent classes, add town to 'no_class'
        consistent_koppen_dict['no_class'].append(town)

# Display the resulting dictionary
print(dict(consistent_koppen_dict))


{'Cwb': ['AMBERIEU', 'ST QUENTIN', 'VICHY-CHARMEIL', 'EMBRUN', 'CHARLEVILLE-MEZ', 'ST GIRONS', 'TROYES-BARBEREY', 'MILLAU', 'RODEZ-AVEYRON', 'CAEN-CARPIQUET', 'ST GATIEN', 'AURILLAC', 'COGNAC', 'CHASSIRON', 'BOURGES', 'AVORD', 'BRIVE', 'DIJON-LONGVIC', 'LANNION', 'PLOUMANAC', 'ST BRIEUC', 'BERGERAC', 'BESANCON', 'EVREUX-HUEST', 'CHARTRES', 'CHATEAUDUN', 'BREST-GUIPAVAS', 'LANVEOC', 'OUESSANT-STIFF', 'QUIMPER', 'LANDIVISIAU', 'MONT', 'AUCH', 'BORDEAUX-MERIGNAC', 'CAZAUX', 'DINARD', 'RENNES-ST', 'CHATEAUROUX', 'TOURS', 'GRENOBLE-ST', 'TAVAUX', 'BISCARROSSE', 'DAX', 'MONT-DE-MARSAN', 'ROMORANTIN', 'BLOIS', 'ST ETIENNE-BOUTHEON', 'NANTES-BOUGUENAIS', 'ST NAZAIRE-MONTOIR', 'ORLEANS', 'GOURDON', 'AGEN-LA', 'BEAUCOUZE', 'GONNEVILLE', 'LANGRES', 'ST-DIZIER', 'LAVAL-ETRONNIER', 'NANCY-OCHEY', 'NANCY-ESSEY', 'BELLE', 'LORIENT-LANN', 'AEROPORT', 'NEVERS-MARZY', 'DUNKERQUE', 'LILLE-LESQUIN', 'CREIL', 'BEAUVAIS-TILLE', 'ALENCON', 'BOULOGNE-SEM', 'LE-TOUQUET', 'CLERMONT-FD', 'BIARRITZ-PAYS-BASQUE', 

In [12]:
def display_koppen_dict(nice_dict, max_words_per_line=5):
    for koppen_class, towns in nice_dict.items():
        print(f"{koppen_class}:")

        # Split the list of towns into lines based on max_words_per_line
        line = []
        for i, town in enumerate(towns, 1):
            line.append(town)
            # Print and clear line every max_words_per_line words, or at the end
            if i % max_words_per_line == 0 or i == len(towns):
                print("    " + ", ".join(line))
                line = []  # Reset line for next batch
        print()  # Blank line between classes

# Display the dictionary with formatted output
display_koppen_dict(consistent_koppen_dict)


Cwb:
    AMBERIEU, ST QUENTIN, VICHY-CHARMEIL, EMBRUN, CHARLEVILLE-MEZ
    ST GIRONS, TROYES-BARBEREY, MILLAU, RODEZ-AVEYRON, CAEN-CARPIQUET
    ST GATIEN, AURILLAC, COGNAC, CHASSIRON, BOURGES
    AVORD, BRIVE, DIJON-LONGVIC, LANNION, PLOUMANAC
    ST BRIEUC, BERGERAC, BESANCON, EVREUX-HUEST, CHARTRES
    CHATEAUDUN, BREST-GUIPAVAS, LANVEOC, OUESSANT-STIFF, QUIMPER
    LANDIVISIAU, MONT, AUCH, BORDEAUX-MERIGNAC, CAZAUX
    DINARD, RENNES-ST, CHATEAUROUX, TOURS, GRENOBLE-ST
    TAVAUX, BISCARROSSE, DAX, MONT-DE-MARSAN, ROMORANTIN
    BLOIS, ST ETIENNE-BOUTHEON, NANTES-BOUGUENAIS, ST NAZAIRE-MONTOIR, ORLEANS
    GOURDON, AGEN-LA, BEAUCOUZE, GONNEVILLE, LANGRES
    ST-DIZIER, LAVAL-ETRONNIER, NANCY-OCHEY, NANCY-ESSEY, BELLE
    LORIENT-LANN, AEROPORT, NEVERS-MARZY, DUNKERQUE, LILLE-LESQUIN
    CREIL, BEAUVAIS-TILLE, ALENCON, BOULOGNE-SEM, LE-TOUQUET
    CLERMONT-FD, BIARRITZ-PAYS-BASQUE, SOCOA, PAU-UZEIN, TARBES-LOURDES-PYRENEES
    STRASBOURG-ENTZHEIM, COLMAR-MEYENHEIM, BALE-MULHOUSE, LU