In [1]:
import pandas as pd
import os
from sklearn.preprocessing import LabelEncoder

In [2]:

# Função para processar e gerar os arquivos CSV
def process_seasons(file_paths, season_number):
    dtype_spec = {
        'gameid': str,
        'datacompleteness': str,
        'url': str,
        'league': str,
        'teamname': str,
        'result': float  # Colunas numéricas
    }

    # Ler e combinar os arquivos CSV em um único DataFrame se houver mais de um arquivo
    df_list = [pd.read_csv(file_path, dtype=dtype_spec) for file_path in file_paths]
    df = pd.concat(df_list, ignore_index=True)

    leagues = ['CBLOL', 'LEC', 'LCK', 'LPL']  # Verificar estas ligas
    all_matches = []

    for league in leagues:
        df_league = df[df['league'] == league]
        if not df_league.empty:  # Somente processa se houver dados
            matches = []

            for gameid, group in df_league.groupby('gameid'):
                # Separar as equipes por 'side'
                team1 = group[group['side'] == 'Blue']
                team2 = group[group['side'] == 'Red']

                # Verificar se ambos os lados estão presentes
                if not team1.empty and not team2.empty:
                    match = {
                        'match_id': gameid,
                        'team_1': team1['teamname'].values[0],
                        'team_2': team2['teamname'].values[0],
                        'league': team1['league'].values[0],
                        'date': team1['date'].values[0],
                        'winner': team1['teamname'].values[0] if team1['result'].values[0] == 1 else team2['teamname'].values[0]
                    }
                    matches.append(match)
                    all_matches.append(match)

            # Criar dataframe de partidas organizadas
            matches_df = pd.DataFrame(matches)

            # Definir a pasta de saída para a liga atual
            output_folder = f"../../repository/{league.lower()}/"
            output_file = os.path.join(output_folder, f"{league}_matches_{season_number}.csv")

            if not os.path.exists(output_folder):
                os.makedirs(output_folder)

            if os.path.exists(output_file):
                os.remove(output_file)

            matches_df.to_csv(output_file, index=False)

# Processar para um CSV (temporada única)
file_paths_single_season = ['../../../data/2023_LoL_esports_match_data_from_OraclesElixir.csv']
process_seasons(file_paths_single_season, "2")

# Processar para dois CSVs (duas temporadas)
file_paths_two_seasons = [
    '../../../data/2023_LoL_esports_match_data_from_OraclesElixir.csv',
    '../../../data/2022_LoL_esports_match_data_from_OraclesElixir.csv'
]
process_seasons(file_paths_two_seasons, "4")
        
    
