# Classification de Koppen d'une liste de stations


In [52]:
! git clone https://github.com/nanopiero/tp_python_avance.git

fatal: destination path 'tp_python_avance' already exists and is not an empty directory.


In [3]:
! ls tp_python_avance

koppen.ipynb  NORMALES_mens.data  README.md


In [82]:
import pandas as pd

# Load the .data file into a DataFrame
# Replace 'your_file_path.data' with the actual path to your file
file_path = 'tp_python_avance/NORMALES_mens.data'

# Use the read_csv function, specifying the delimiter and decimal character
df = pd.read_csv(file_path, delimiter=';', dtype={'POSTE': str}, decimal=',')

# Display the first few rows of the DataFrame to check the data
print(df.head())

      POSTE       NOM  ALT  DATE  RR_RRMOY  T_TMMOY
0  01089001  AMBERIEU  250     1      84.9      3.2
1  01089001  AMBERIEU  250     2      70.0      4.2
2  01089001  AMBERIEU  250     3      75.0      8.0
3  01089001  AMBERIEU  250     4      87.2     11.3
4  01089001  AMBERIEU  250     5     106.4     15.2


In [83]:
import pandas as pd

# Rename columns for clarity
df.columns = ['POSTE', 'NOM', 'ALT', 'DATE', 'RR_RRMOY', 'T_TMMOY']

# Create two new columns with month-specific names for precipitation and temperature
df['RR'] = df['DATE'].apply(lambda x: f"{int(x):02d}_RR")  # Formats as '01_RR', '02_RR', etc.
df['T'] = df['DATE'].apply(lambda x: f"{int(x):02d}_T")    # Formats as '01_T', '02_T', etc.

# Pivot for precipitation and temperature, separately
df_rr = df.pivot(index='NOM', columns='RR', values='RR_RRMOY')
df_t = df.pivot(index='NOM', columns='T', values='T_TMMOY')

# Merge the two pivoted DataFrames, keeping 'ALT' as a separate column
df_alt = df[['NOM', 'POSTE', 'ALT']].drop_duplicates().set_index('NOM')
result_df = pd.concat([df_alt, df_rr, df_t], axis=1).reset_index()

result_df['hemisphere'] = \
    result_df.apply(department_and_hemisphere_from_dfrow, axis=1)

# Display the reorganized DataFrame
print(result_df.head())


              NOM     POSTE  ALT  01_RR  02_RR  03_RR  04_RR  05_RR  06_RR  \
0        AMBERIEU  01089001  250   84.9   70.0   75.0   87.2  106.4   88.8   
1      ST QUENTIN  02320001   98   54.1   48.0   51.3   43.2   57.1   59.8   
2  VICHY-CHARMEIL  03060001  249   48.1   37.5   43.5   68.5   88.4   72.7   
3        ST AUBAN  04049001  458   48.2   35.9   44.7   64.8   63.9   53.5   
4          EMBRUN  05046001  873   51.0   42.9   49.5   57.0   69.3   61.1   

   07_RR  ...  05_T  06_T  07_T  08_T  09_T  10_T  11_T  12_T  13_T  \
0   86.0  ...  15.2  19.0  21.1  20.9  16.7  12.6   7.1   3.9  11.9   
1   60.2  ...  13.4  16.2  18.4  18.4  15.2  11.4   6.9   4.1  10.8   
2   75.7  ...  14.4  18.1  20.2  20.1  16.2  12.6   7.5   4.6  11.7   
3   35.7  ...  16.0  20.2  23.1  22.8  18.4  14.0   8.7   5.2  13.4   
4   49.2  ...  14.1  18.0  20.6  20.5  16.1  11.8   6.4   2.9  11.1   

            hemisphere  
0  Northern Hemisphere  
1  Northern Hemisphere  
2  Northern Hemisphere  
3  N

In [84]:
def department_and_hemisphere_from_post_id(poste_id):
    """
    Extracts the French department number from the POSTE ID and determines the hemisphere.

    Args:
        poste_id (str): The POSTE ID as a string.

    Returns:
        tuple: (department_number (str), hemisphere (str))

    Examples:
    >>> department_and_hemisphere('01089001')
    ('01', 'Northern Hemisphere')
    >>> department_and_hemisphere('97320001')
    ('973', 'Northern Hemisphere')
    >>> department_and_hemisphere('98600001')
    ('986', 'Southern Hemisphere')
    >>> department_and_hemisphere('00100000')
    ('00', 'Northern Hemisphere')
    """
    # Extract the first 2 or 3 digits as the department number
    department_number = poste_id[:3] if poste_id[:3] in ['971', '972', '973', '974', '976', '986', '987', '988'] else poste_id[:2]

    # Determine the hemisphere (Northern for mainland France, Southern for French territories like '986')
    southern_departments = {'986', '987', '988'}
    hemisphere = 'Southern Hemisphere' if department_number in southern_departments else 'Northern Hemisphere'

    return department_number, hemisphere


def department_and_hemisphere_from_dfrow(row):
    """
    wrapping of department_and_hemisphere
    """
    poste_id = row['POSTE']
    department_number, hemisphere = department_and_hemisphere_from_post_id(poste_id)
    return hemisphere

In [126]:
# Apply the function to each row in result_df
result_df['koppen_class'] = result_df.apply(koppen_classification_from_dfrow, axis=1)

# Display the updated DataFrame with Köppen class
print(result_df[['NOM', 'ALT', 'koppen_class']].head())


              NOM  ALT koppen_class
0        AMBERIEU  250           Aw
1      ST QUENTIN   98           Aw
2  VICHY-CHARMEIL  249           Aw
3        ST AUBAN  458           Aw
4          EMBRUN  873           Aw


In [127]:
def koppen_classification_from_dfrow(row):
    """
    Wrapper function to compute Köppen classification from a DataFrame row.

    Args:
        row (pd.Series): A row of the DataFrame with relevant climate data.

    Returns:
        str: The full Köppen classification or 'no_class' if it cannot be determined.
    """
    # Extract data from the DataFrame row
    alt = row['ALT']
    monthly_precip = [row[f"{str(month).zfill(2)}_RR"] for month in range(1, 13)]
    monthly_temp = [row[f"{str(month).zfill(2)}_T"] for month in range(1, 13)]
    annual_precip = row.get("13_RR", None)
    annual_temp = row.get("13_T", None)
    hemisphere = row.get("hemisphere", None)
    station_name = row.get('NOM', None)

    # Call the refactored koppen_classification function
    return koppen_classification(
        alt=alt,
        monthly_precip=monthly_precip,
        monthly_temp=monthly_temp,
        annual_precip=annual_precip,
        annual_temp=annual_temp,
        hemisphere=hemisphere,
        station_name=station_name
    )


def koppen_classification(alt, monthly_precip, monthly_temp, annual_precip=None, annual_temp=None, hemisphere=None, station_name=None):
    """
    Determines the full Köppen classification based on input parameters.

    Args:
        alt (float): Altitude of the station.
        monthly_precip (list): List of monthly precipitation totals.
        monthly_temp (list): List of monthly mean temperatures.
        annual_precip (float, optional): Total annual precipitation. Defaults to None.
        annual_temp (float, optional): Mean annual temperature. Defaults to None.
        hemisphere (str, optional): 'Northern Hemisphere' or 'Southern Hemisphere'. Defaults to None.
        station_name (str, optional): Station name for debugging. Defaults to None.

    Returns:
        str: The full Köppen classification or 'no_class' if it cannot be determined.
    """
    try:
        # Extract data from the DataFrame row
        alt = row['ALT']
        monthly_precip = [row[f"{str(month).zfill(2)}_RR"] for month in range(1, 13)]
        monthly_temp = [row[f"{str(month).zfill(2)}_T"] for month in range(1, 13)]
        annual_precip = row.get("13_RR", None)
        annual_temp = row.get("13_T", None)
        hemisphere = row.get("hemisphere", None)  # If hemisphere info is available

        # Initialize classification letters
        class_letters = []

        # Determine the first letter (main climate group)
        try:
            first_letter = koppen_class_first_letter(annual_temp, annual_precip, monthly_temp, monthly_precip)
            class_letters.append(first_letter)
        except Exception as e:
            class_letters.append('no_class')
            return 'no_class'  # Cannot proceed if the first letter fails

        # Determine the second letter (precipitation pattern)
        try:
            class_letters.append(koppen_class_second_letter(first_letter, annual_precip,
                                                            monthly_precip, hemisphere, row['NOM']))
        except Exception as e:
            class_letters.append('no_class')

        # Determine the third letter (temperature characteristics)
        try:
            class_letters.append(koppen_class_third_letter(first_letter, annual_temp, monthly_temp))
        except Exception as e:
            class_letters.append('no_class')

        # Concatenate the letters to form the full classification
        return ''.join([letter for letter in class_letters if letter != 'no_class']) or 'no_class'
    except Exception as e:
        return 'no_class'


def koppen_class_first_letter(annual_temp, annual_precip, monthly_temp, monthly_precip):
    """
    Determines the first letter of the Köppen classification.

    Args:
        annual_temp (float): Annual mean temperature.
        annual_precip (float): Annual precipitation total.
        monthly_temp (list): List of monthly mean temperatures.
        monthly_precip (list): List of monthly precipitation totals.

    Returns:
        str: The first letter of the classification.
    """
    if annual_temp > 18 and min(monthly_temp) > 18:
        return 'A'  # Tropical
    elif annual_precip < (annual_temp * 20 if annual_temp else 0):
        return 'B'  # Arid
    elif -3 <= min(monthly_temp) < 18 and max(monthly_temp) > 10:
        return 'C'  # Temperate
    elif min(monthly_temp) < -3 and max(monthly_temp) > 10:
        return 'D'  # Continental
    elif max(monthly_temp) < 10:
        return 'E'  # Polar
    else:
        raise ValueError("Unable to determine first letter.")


def koppen_class_second_letter(first_letter, annual_precip, monthly_precip,
                               hemisphere, nom):
    """
    Determines the second letter of the Köppen classification based on the first letter.

    Args:
        first_letter (str): The first letter of the classification.
        annual_precip (float): Annual precipitation total.
        monthly_precip (list): List of monthly precipitation totals.
        hemisphere (str): 'Northern Hemisphere' or 'Southern Hemisphere'.

    Returns:
        str: The second letter of the classification.
    """
    if first_letter == 'A':  # Tropical climates
        P_dry = min(monthly_precip)
        if P_dry >= 60:
            return 'f'  # No dry season (rainforest)
        elif P_dry < 60 and P_dry < (100 - annual_precip / 25):
            return 'w'  # Dry winter (savanna)
        elif P_dry < 60 and P_dry >= (100 - annual_precip / 25):
            return 's'  # Dry summer (savanna)

    elif first_letter in {'C', 'D'}:  # Temperate or Continental climates
        if hemisphere == 'Northern Hemisphere':
            P_wet_winter = max(monthly_precip[11:] + monthly_precip[:2])  # Wettest winter month (Dec, Jan, Feb)
            P_dry_winter = min(monthly_precip[11:] + monthly_precip[:2])  # Driest winter month (Dec, Jan, Feb)
            P_wet_summer = max(monthly_precip[5:8])  # Wettest summer month (Jun, Jul, Aug)
            P_dry_summer = min(monthly_precip[5:8])  # Driest summer month (Jun, Jul, Aug)
        else:
            P_wet_winter = max(monthly_precip[5:8])  # Wettest winter month (Jun, Jul, Aug - Southern Hemisphere)
            P_dry_winter = min(monthly_precip[5:8])  # Driest winter month (Jun, Jul, Aug - Southern Hemisphere)
            P_wet_summer = max(monthly_precip[11:] + monthly_precip[:2])  # Wettest summer month (Dec, Jan, Feb - Southern Hemisphere)
            P_dry_summer = min(monthly_precip[11:] + monthly_precip[:2])  # Driest summer month (Dec, Jan, Feb - Southern Hemisphere)

        # Check for 'w' (dry winter)
        if P_dry_winter < P_wet_winter / 10:
            return 'w'

        # Check for 's' (dry summer)
        if (P_dry_summer < 40) and (P_dry_summer < P_wet_winter / 3):
            return 's'

        # If neither 'w' nor 's', it's 'f'
        return 'f'


    elif first_letter == 'B':  # Arid climates
        return 'W' if annual_precip < 250 else 'S'  # Desert or Steppe

    elif first_letter == 'E':  # Polar climates
        return 'T' if max(monthly_precip) > 0 else 'F'  # Tundra or Ice Cap

    else:
        raise ValueError("Unable to determine second letter.")



def koppen_class_third_letter(first_letter, annual_temp, monthly_temp):
    """
    Determines the third letter of the Köppen classification based on the first letter.

    Args:
        first_letter (str): The first letter of the classification.
        annual_temp (float): Annual mean temperature.
        monthly_temp (list): List of monthly mean temperatures.

    Returns:
        str: The third letter of the classification.
    """
    if first_letter == 'A':  # Tropical climates don't have a third letter
        return ''
    elif first_letter == 'B':  # Arid climates
        return 'h' if annual_temp > 18 else 'k'  # Hot or cold
    elif first_letter in {'C', 'D'}:  # Temperate or Continental climates
        if max(monthly_temp) > 22:
            return 'a'  # Hot summer
        elif max(monthly_temp) < 22 and sum(1 for temp in monthly_temp if temp > 10) >= 4:
            return 'b'  # Warm summer
        elif max(monthly_temp) < 22 and sum(1 for temp in monthly_temp if temp > 10) < 4 and min(monthly_temp) > -38:
            return 'c'  # Short and cool summer
        else:
            return 'd'  # Severe winter

    elif first_letter == 'E':  # Polar climates
        return ''  # Polar climates don't have a third letter
    else:
        raise ValueError("Unable to determine third letter.")



In [121]:
# Create the dictionary with lists of station names by koppen_class
koppen_dict = result_df.groupby('koppen_class')['NOM'].apply(list).to_dict()

# Display the dictionary
print(koppen_dict)


{'Af': ['LAMENTIN-AERO', 'MAOPOOPO', 'HIHIFO', 'BORA-BORA-MOTU-AERO', 'MANGAREVA', 'HIVA-OA', 'RAPA', 'TAKAROA', 'OUANAHAM'], 'As': ['LE RAIZET AERO', 'CAYENNE-MATOURY', 'SAINT GEORGES', 'MARIPASOULA', 'GILLOT-AEROPORT', 'FAAA'], 'Aw': ['ST-BARTHELEMY METEO', 'TROMELIN', 'PAMANDZI', 'NOUMEA', 'LA TONTOUTA'], 'Cfa': ['ST AUBAN', 'LANAS SYN', 'CARCASSONNE', 'MONTELIMAR', 'NIMES-COURBESSAC', 'NIMES-GARONS', 'TOULOUSE-BLAGNAC', 'LYON-BRON', 'LYON-ST EXUPERY', 'ALBI', 'MONTAUBAN', 'AVIGNON', 'CARPENTRAS', 'ORANGE'], 'Cfb': ['AMBERIEU', 'ST QUENTIN', 'VICHY-CHARMEIL', 'EMBRUN', 'CHARLEVILLE-MEZ', 'ST GIRONS', 'TROYES-BARBEREY', 'MILLAU', 'RODEZ-AVEYRON', 'CAEN-CARPIQUET', 'ST GATIEN DES B', 'AURILLAC', 'COGNAC', 'LA ROCHELLE-ILE DE RE', 'CHASSIRON', 'BOURGES', 'AVORD', 'BRIVE', 'DIJON-LONGVIC', 'LANNION_AERO', "PLOUMANAC'H", 'ST BRIEUC', 'BERGERAC', 'BESANCON', 'EVREUX-HUEST', 'CHARTRES', 'CHATEAUDUN', 'BREST-GUIPAVAS', 'LANVEOC', 'OUESSANT-STIFF', 'QUIMPER', 'LANDIVISIAU', 'MONT AIGOUAL', '

In [122]:
import re
from collections import defaultdict

# Helper function to extract the town name
import re

def extract_town_name(station_name):
    """
    Extracts the town name from a station name, handling special cases and cleaning it.

    Args:
        station_name (str): The original station name.

    Returns:
        str: The extracted and cleaned town name.

    Examples:
        >>> extract_town_name("L ILE D YEU")
        'ILE D YEU'
        >>> extract_town_name("LYON-St EXUPERY-METEO")
        'LYON-ST EXUPERY'
        >>> extract_town_name("MONT AIGUAL")
        'MONT AIGUAL'
        >>> extract_town_name("PARIS-AERO")
        'PARIS'
    """
    # Convert to uppercase for uniformity
    station_name = station_name.upper()

    # Remove "METEO", "AERO", "AEROPORT" and any dashes or spaces associated with them
    station_name = re.sub(r"(\s?-?\s?(METEO|AERO|AEROPORT)\b)", "", station_name)

    # Handle specific cases
    if station_name.startswith("L ILE"):
        station_name = station_name.replace("L ILE", "ILE")

    # Normalize "Saint" and "St" to "ST"
    station_name = re.sub(r"\b(SAINT|ST)[\-\s\.]*", "ST ", station_name)

    # Preserve compound words like "MONT AIGUAL"
    station_name = re.sub(r"\b(MONT|MT)\b", "MONT", station_name)

    # Remove extra spaces and clean up
    station_name = re.sub(r"\s{2,}", " ", station_name).strip()

    return station_name


# Step 1: Group station names by town
town_groups = defaultdict(list)
for idx, row in result_df.iterrows():
    town_name = extract_town_name(row['NOM'])
    town_groups[town_name].append((row['NOM'], row['koppen_class']))

# Step 2: Check class consistency within each town group and build the new dictionary
consistent_koppen_dict = defaultdict(list)

for town, stations in town_groups.items():
    # Extract classes, ignoring 'no_class'
    classes = set([class_ for _, class_ in stations if class_ != 'no_class'])

    if len(classes) == 1:
        # All classes are consistent, add town to the unique class
        consistent_class = classes.pop() if classes else 'no_class'
        consistent_koppen_dict[consistent_class].append(town)
    else:
        # Inconsistent classes, add town to 'no_class'
        consistent_koppen_dict['no_class'].append(town)

# Display the resulting dictionary
print(dict(consistent_koppen_dict))


{'Cfb': ['AMBERIEU', 'ST QUENTIN', 'VICHY-CHARMEIL', 'EMBRUN', 'CHARLEVILLE-MEZ', 'ST GIRONS', 'TROYES-BARBEREY', 'MILLAU', 'RODEZ-AVEYRON', 'CAEN-CARPIQUET', 'ST GATIEN DES B', 'AURILLAC', 'COGNAC', 'LA ROCHELLE-ILE DE RE', 'CHASSIRON', 'BOURGES', 'AVORD', 'BRIVE', 'DIJON-LONGVIC', 'LANNION_', "PLOUMANAC'H", 'ST BRIEUC', 'BERGERAC', 'BESANCON', 'EVREUX-HUEST', 'CHARTRES', 'CHATEAUDUN', 'BREST-GUIPAVAS', 'LANVEOC', 'OUESSANT-ST IFF', 'QUIMPER', 'LANDIVISIAU', 'MONT AIGOUAL', 'AUCH', 'BORDEAUX-MERIGNAC', 'CAZAUX', 'DINARD', 'RENNES-ST JACQUES', 'CHATEAUROUX DEOLS', 'TOURS', 'GRENOBLE-ST GEOIRS', 'TAVAUX SA', 'BISCARROSSE', 'DAX', 'MONT-DE-MARSAN', 'ROMORANTIN', 'BLOIS', 'ST ETIENNE-BOUTHEON', 'LE PUY-LOUDES', 'NANTES-BOUGUENAIS', 'ST NAZAIRE-MONTOIR', 'ORLEANS', 'GOURDON', 'BEAUCOUZE', 'GONNEVILLE', 'REIMS-PRUNAY', 'LANGRES', 'ST DIZIER', 'LAVAL-ETRONNIER', 'NANCY-OCHEY', 'NANCY-ESSEY', 'BELLE ILE-LE TALUT', 'LORIENT-LANN BIHOUE', 'METZ-NANCY-LORRAINE', 'NEVERS-MARZY', 'DUNKERQUE', 'LIL

In [123]:
def display_koppen_dict(nice_dict, max_words_per_line=5):
    for koppen_class, towns in nice_dict.items():
        print(f"{koppen_class}:")

        # Split the list of towns into lines based on max_words_per_line
        line = []
        for i, town in enumerate(towns, 1):
            line.append(town)
            # Print and clear line every max_words_per_line words, or at the end
            if i % max_words_per_line == 0 or i == len(towns):
                print("    " + ", ".join(line))
                line = []  # Reset line for next batch
        print()  # Blank line between classes

# Display the dictionary with formatted output
display_koppen_dict(consistent_koppen_dict)


Cfb:
    AMBERIEU, ST QUENTIN, VICHY-CHARMEIL, EMBRUN, CHARLEVILLE-MEZ
    ST GIRONS, TROYES-BARBEREY, MILLAU, RODEZ-AVEYRON, CAEN-CARPIQUET
    ST GATIEN DES B, AURILLAC, COGNAC, LA ROCHELLE-ILE DE RE, CHASSIRON
    BOURGES, AVORD, BRIVE, DIJON-LONGVIC, LANNION_
    PLOUMANAC'H, ST BRIEUC, BERGERAC, BESANCON, EVREUX-HUEST
    CHARTRES, CHATEAUDUN, BREST-GUIPAVAS, LANVEOC, OUESSANT-ST IFF
    QUIMPER, LANDIVISIAU, MONT AIGOUAL, AUCH, BORDEAUX-MERIGNAC
    CAZAUX, DINARD, RENNES-ST JACQUES, CHATEAUROUX DEOLS, TOURS
    GRENOBLE-ST GEOIRS, TAVAUX SA, BISCARROSSE, DAX, MONT-DE-MARSAN
    ROMORANTIN, BLOIS, ST ETIENNE-BOUTHEON, LE PUY-LOUDES, NANTES-BOUGUENAIS
    ST NAZAIRE-MONTOIR, ORLEANS, GOURDON, BEAUCOUZE, GONNEVILLE
    REIMS-PRUNAY, LANGRES, ST DIZIER, LAVAL-ETRONNIER, NANCY-OCHEY
    NANCY-ESSEY, BELLE ILE-LE TALUT, LORIENT-LANN BIHOUE, METZ-NANCY-LORRAINE, NEVERS-MARZY
    DUNKERQUE, LILLE-LESQUIN, CREIL, BEAUVAIS-TILLE, ALENCON
    BOULOGNE-SEM, LE-TOUQUET, CLERMONT-FD, BIARRITZ

In [128]:
def koppen_classification_from_dfrow(row):
    """
    Wrapper function to compute Köppen classification from a DataFrame row using the Koppen class.

    Args:
        row (pd.Series): A row of the DataFrame with relevant climate data.

    Returns:
        str: The full Köppen classification or 'no_class' if it cannot be determined.
    """
    # Extract data from the DataFrame row
    koppen_instance = Koppen(
        alt=row['ALT'],
        monthly_precip=[row[f"{str(month).zfill(2)}_RR"] for month in range(1, 13)],
        monthly_temp=[row[f"{str(month).zfill(2)}_T"] for month in range(1, 13)],
        annual_precip=row.get("13_RR", None),
        annual_temp=row.get("13_T", None),
        hemisphere=row.get("hemisphere", None),
        station_name=row.get('NOM', None)
    )

    # Compute the classification
    return koppen_instance.classify()
