In [1]:
import io
import os
import pandas as pd
from googleapiclient.http import MediaIoBaseDownload
from googleapiclient.discovery import build
from google.oauth2 import service_account

def download_csv_files_from_folder(folder_id):
    # Set up Google Drive API credentials
    credentials_file = '/Users/micah.mathews/Documents/machinelearning-392616-c2e002ef2244.json'
    scopes = ['https://www.googleapis.com/auth/drive.readonly']
    credentials = service_account.Credentials.from_service_account_file(credentials_file, scopes=scopes)
    drive_service = build('drive', 'v3', credentials=credentials)

    results = drive_service.files().list(q=f"'{folder_id}' in parents and mimeType='text/csv'",
                                         fields='files(id, name)').execute()
    csv_files = results.get('files', [])

    download_directory = "/Users/micah.mathews/Documents/wager_ai/lol_data/data_extraction"

    # Download the updated CSV files, overwriting existing files with the same names
    for file in csv_files:
        file_id = file['id']
        file_name = file['name']
        download_path = os.path.join(download_directory, file_name)

        request = drive_service.files().get_media(fileId=file_id)
        fh = io.FileIO(download_path, mode='wb')
        downloader = MediaIoBaseDownload(fh, request)

        done = False
        while done is False:
            status, done = downloader.next_chunk()

        print(f"Downloaded: {download_path}")

    combined_dfs = []
    
    # Create a combined DataFrame with selected columns
    selected_columns = ['gameid', 'datacompleteness', 'url', 'league', 'year', 'split', 'playoffs', 'date',
                        'game', 'patch', 'side', 'position', 'playername', 'teamname', 'result', 'champion']
    
    for file in csv_files:
        file_name = file['name']
        
        # Check for files named from 2015 to 2023
        if any(f"{year}_LoL_esports_match_data_from_OraclesElixir" in file_name for year in range(2015, 3000)):
            file_path = os.path.join(download_directory, file_name)
            df = pd.read_csv(file_path)
            df = df[selected_columns]
            combined_dfs.append(df)

    combined_df = pd.concat(combined_dfs, ignore_index=True)

    combined_csv_file = "combined_csv_file.csv"
    combined_csv_path = os.path.join(download_directory, combined_csv_file)
    
    combined_df.to_csv(combined_csv_path, index=False)
    print(f"Combined CSV file created: {combined_csv_path}")

folder_id = '1gLSw0RLjBbtaNy0dgnGQDAZOHIgCe-HH'
download_csv_files_from_folder(folder_id)


Downloaded: /Users/micah.mathews/Documents/wager_ai/lol_data/data_extraction/2023_LoL_esports_match_data_from_OraclesElixir.csv
Downloaded: /Users/micah.mathews/Documents/wager_ai/lol_data/data_extraction/2022_LoL_esports_match_data_from_OraclesElixir.csv
Downloaded: /Users/micah.mathews/Documents/wager_ai/lol_data/data_extraction/2021_LoL_esports_match_data_from_OraclesElixir.csv
Downloaded: /Users/micah.mathews/Documents/wager_ai/lol_data/data_extraction/2020_LoL_esports_match_data_from_OraclesElixir.csv
Downloaded: /Users/micah.mathews/Documents/wager_ai/lol_data/data_extraction/2019_LoL_esports_match_data_from_OraclesElixir.csv
Downloaded: /Users/micah.mathews/Documents/wager_ai/lol_data/data_extraction/2018_LoL_esports_match_data_from_OraclesElixir.csv
Downloaded: /Users/micah.mathews/Documents/wager_ai/lol_data/data_extraction/2017_LoL_esports_match_data_from_OraclesElixir.csv
Downloaded: /Users/micah.mathews/Documents/wager_ai/lol_data/data_extraction/2016_LoL_esports_match_data

  df = pd.read_csv(file_path)


Combined CSV file created: /Users/micah.mathews/Documents/wager_ai/lol_data/data_extraction/combined_csv_file.csv
