# Data Restructure

We will now attempt to combine our data and convert it into a more visualization friendly long format

## Start by combining files in a folder

We will follow the following steps:
1. collect all games/lobbies of the same round
2. collect all rounds of the same level
2. collect all levels
3. collect all regions

In [2]:
import os
import pandas as pd

In [2]:
# Start with looking at our cleaned data:
directory = '../Outputs/APAC_North/Preseason_Qualifier_1/Round_1/Cleaned'

df = pd.read_csv(f"{directory}/Lobby 1_cleaned.csv")
df.head()

Unnamed: 0,Standings,Team.1,Total
0,1.0,FC Destroy,112
1,2.0,Dps,38
2,3.0,CJAM,32
3,4.0,sspZ,31
4,5.0,Joel Mark,24


In [6]:
# We want to add a column with the lobby so that when we add the files we know what lobby it is in

df['Lobby'] = 'Lobby 1'
df.head()

Unnamed: 0,Standings,Team.1,Total,Lobby
0,1.0,FC Destroy,112,Lobby 1
1,2.0,Dps,38,Lobby 1
2,3.0,CJAM,32,Lobby 1
3,4.0,sspZ,31,Lobby 1
4,5.0,Joel Mark,24,Lobby 1


In [13]:
# Repeat for all files
all_lobbies = pd.DataFrame()

for file in os.listdir(directory):
    temp = pd.read_csv(f"{directory}/{file}")
    lobby = str(file).strip("'").replace('_cleaned.csv','') # We don't want the end here
    temp['Lobby'] = lobby
    all_lobbies = pd.concat([all_lobbies,temp])

In [15]:
if not os.path.exists(f'{directory}/Combined'): # check if the folder exists, otherwise make it
    os.mkdir(f'{directory}/Combined')

In [16]:
all_lobbies.to_csv(f'{directory}/Combined/all_lobbies.csv')

In [73]:
def folder_gen(directory):
    if not os.path.exists(f'{directory}/Combined'): # check if the folder exists, otherwise make it
        os.mkdir(f'{directory}/Combined')
        
def lobby_combiner(directory):
    all_lobbies = pd.DataFrame()

    for file in os.listdir(directory):
        if file != 'Combined':
            temp = pd.read_csv(f"{directory}/{file}")
            lobby = str(file).strip("'").replace('_cleaned.csv','') # We don't want the end here
            temp['Lobby'] = lobby
            all_lobbies = pd.concat([all_lobbies,temp])
            
    return all_lobbies

def round_combiner(region, split, circuit, round_)
    outer_dir = f'../Outputs/{region}/{split}/{circuit}'
    directory = f'{outer_dir}/{round_}'
    
    temp = pd.read_csv(f"{directory}/all_second.csv")
    temp['Region'] = region
    all_region_pql = pd.concat([all_region_pql, temp])

In [19]:
rounds = ['Round_1', 'Round_2', 'Round_3','Quarterfinals', 'Semifinals', 'Finals']

for r in rounds:
    directory = f'../Outputs/APAC_North/Preseason_Qualifier_1/{r}/Cleaned'
    
    folder_gen(directory)
    
    final_df = lobby_combiner(directory)
    final_df.to_csv(f'{directory}/Combined/{r}_combined.csv')

In [63]:
regions = ["APAC_North", "APAC_South", "EMEA", "North_America", "South_America"]
pqls = ['Preseason_Qualifier_1', 'Preseason_Qualifier_2', 'Preseason_Qualifier_3', 'Preseason_Qualifier_4']
rounds = ['Round_1', 'Round_2', 'Round_3','Quarterfinals', 'Semifinals', 'Finals']


for region in regions:
    for pql in pqls:
        for r in rounds:
            directory = f'../Outputs/{region}/{pql}/{r}/Cleaned'
    
            folder_gen(directory)

            final_df = lobby_combiner(directory)
            final_df.to_csv(f'{directory}/Combined/{r}_combined.csv', index=False)

Now combining the rounds of PQL into each PQL folder

In [50]:
# PQL 1 has diff format for Rounds 1-3 and QF -> Finals
outter_dir = f'../Outputs/APAC_North/Preseason_Qualifier_1'
first_rounds = ['Round_1', 'Round_2', 'Round_3']
second_rounds = ['Quarterfinals', 'Semifinals', 'Finals']

all_first = pd.DataFrame()
all_second = pd.DataFrame()

for r in first_rounds:
    directory = f'{outter_dir}/{r}/Cleaned/Combined'
    try:
        temp = pd.read_csv(f"{directory}/{r}_combined.csv")
        temp['Round'] = f'{r}'
        all_first = pd.concat([all_first,temp])
    except pd.errors.EmptyDataError:
        continue

    
for r in second_rounds:
    directory = f'{outter_dir}/{r}/Cleaned/Combined'
    temp = pd.read_csv(f"{directory}/{r}_combined.csv")
    temp['Round'] = f'{r}'
    all_second = pd.concat([all_second,temp])
   
    
all_first.to_csv(f'{outter_dir}/all_first.csv', index=False)
all_second.to_csv(f'{outter_dir}/all_second.csv',index=False)

In [55]:
regions = ["APAC_North", "APAC_South", "EMEA", "North_America", "South_America"]
pqls = ['Preseason_Qualifier_2', 'Preseason_Qualifier_3', 'Preseason_Qualifier_4']
first_rounds = ['Round_1', 'Round_2', 'Round_3']
second_rounds = ['Quarterfinals', 'Semifinals', 'Finals']

for region in regions:
    all_first = pd.DataFrame()
    all_second = pd.DataFrame()
    outter_dir = f'../Outputs/{region}/Preseason_Qualifier_1'
    
    for r in first_rounds:
        directory = f'{outter_dir}/{r}/Cleaned/Combined'
        try:
            temp = pd.read_csv(f"{directory}/{r}_combined.csv")
            temp['Round'] = f'{r}'
            all_first = pd.concat([all_first,temp])
        except pd.errors.EmptyDataError:
            continue

    
    for r in second_rounds:
        try:
            directory = f'{outter_dir}/{r}/Cleaned/Combined'
            temp = pd.read_csv(f"{directory}/{r}_combined.csv")
            temp['Round'] = f'{r}'
            all_second = pd.concat([all_second,temp])
        except pd.errors.EmptyDataError:
            continue
        
    all_first.to_csv(f'{outter_dir}/all_first.csv', index=False)
    all_second.to_csv(f'{outter_dir}/all_second.csv',index=False)

In [58]:
# PQL 2-4 are fine
regions = ["APAC_North", "APAC_South", "EMEA", "North_America", "South_America"]
pqls = ['Preseason_Qualifier_2', 'Preseason_Qualifier_3', 'Preseason_Qualifier_4']
rounds = ['Round_1', 'Round_2', 'Round_3','Quarterfinals', 'Semifinals', 'Finals']

for region in regions:
    for pql in pqls:
        outter_dir = f'../Outputs/{region}/{pql}'
        all_rounds = pd.DataFrame()
        
        for r in rounds:
            try:
                directory = f'{outter_dir}/{r}/Cleaned/Combined'
                temp = pd.read_csv(f"{directory}/{r}_combined.csv")
                temp['Round'] = f'{r}'
                all_rounds = pd.concat([all_rounds, temp])
            except pd.errors.EmptyDataError:
                continue
            
        all_rounds.to_csv(f'{outter_dir}/all_rounds.csv', index=False)

In [23]:
## SPECIALTY TWEAKS FOR EMEA PQ 1 ROUND 2 AND UP
directory = "../Outputs/EMEA/Preseason_Qualifier_1/Round_2/Cleaned"

final_df = lobby_combiner(directory)
final_df.to_csv(f"{directory}/Combined/Round_2_combined.csv", index=False)

In [30]:
regions = ["EMEA"]
first_rounds = ['Round_1']
second_rounds = ['Round_2','Quarterfinals', 'Semifinals', 'Finals']


for region in regions:
    all_first = pd.DataFrame()
    all_second = pd.DataFrame()
    outter_dir = f'../Outputs/{region}/Preseason_Qualifier_1'
    
    for r in first_rounds:
        directory = f'{outter_dir}/{r}/Cleaned/Combined'
        try:
            temp = pd.read_csv(f"{directory}/{r}_combined.csv")
            temp['Round'] = f'{r}'
            all_first = pd.concat([all_first,temp])
        except pd.errors.EmptyDataError:
            continue

    
    for r in second_rounds:
        try:
            directory = f'{outter_dir}/{r}/Cleaned/Combined'
            temp = pd.read_csv(f"{directory}/{r}_combined.csv")
            temp['Round'] = f'{r}'
            all_second = pd.concat([all_second,temp])
        except pd.errors.EmptyDataError:
            continue
        
    all_first.to_csv(f'{outter_dir}/all_first.csv', index=False)
    all_second.to_csv(f'{outter_dir}/all_second.csv',index=False)

In [38]:
## SPECIALTY TWEAKS FOR SOUTH AM PQ 1 ROUND 1 TO SEMIS
rounds = ['Round_1', 'Quarterfinals', 'Semifinals']

for r in rounds:
    directory = f"../Outputs/South_America/Preseason_Qualifier_1/{r}/Cleaned"
    
    final_df = lobby_combiner(directory)
    final_df.to_csv(f"{directory}/Combined/{r}_combined.csv",index=False)

In [64]:
regions = ["South_America"]
first_rounds = ['Round_1', 'Quarterfinals', 'Semifinals']
second_rounds = ['Finals']


for region in regions:
    all_first = pd.DataFrame()
    all_second = pd.DataFrame()
    outter_dir = f'../Outputs/{region}/Preseason_Qualifier_1'
    
    for r in first_rounds:
        directory = f'{outter_dir}/{r}/Cleaned/Combined'
        try:
            temp = pd.read_csv(f"{directory}/{r}_combined.csv")
            temp['Round'] = f'{r}'
            all_first = pd.concat([all_first,temp])
        except pd.errors.EmptyDataError:
            continue

    
    for r in second_rounds:
        try:
            directory = f'{outter_dir}/{r}/Cleaned/Combined'
            temp = pd.read_csv(f"{directory}/{r}_combined.csv")
            temp['Round'] = f'{r}'
            all_second = pd.concat([all_second,temp])
        except pd.errors.EmptyDataError:
            continue
        
    all_first.to_csv(f'{outter_dir}/all_first.csv', index=False)
    all_second.to_csv(f'{outter_dir}/all_second.csv',index=False)

In [65]:
## APAC SOUTH R1 and QF
regions = ["APAC_South"]
first_rounds = ['Round_1', 'Quarterfinals']
second_rounds = ['Semifinals', 'Finals']


for region in regions:
    all_first = pd.DataFrame()
    all_second = pd.DataFrame()
    outter_dir = f'../Outputs/{region}/Preseason_Qualifier_1'
    
    for r in first_rounds:
        directory = f'{outter_dir}/{r}/Cleaned/Combined'
        try:
            temp = pd.read_csv(f"{directory}/{r}_combined.csv")
            temp['Round'] = f'{r}'
            all_first = pd.concat([all_first,temp])
        except pd.errors.EmptyDataError:
            continue

    
    for r in second_rounds:
        try:
            directory = f'{outter_dir}/{r}/Cleaned/Combined'
            temp = pd.read_csv(f"{directory}/{r}_combined.csv")
            temp['Round'] = f'{r}'
            all_second = pd.concat([all_second,temp])
        except pd.errors.EmptyDataError:
            continue
        
    all_first.to_csv(f'{outter_dir}/all_first.csv', index=False)
    all_second.to_csv(f'{outter_dir}/all_second.csv',index=False)

In [66]:
# Combining all preseason 1 second half and preseason 2-4 quals into one big file

regions = ["APAC_North", "APAC_South", "EMEA", "North_America", "South_America"]
pqls = ['Preseason_Qualifier_1','Preseason_Qualifier_2', 'Preseason_Qualifier_3', 'Preseason_Qualifier_4']

for region in regions:
    outer_dir = f'../Outputs/{region}'
    all_pqls = pd.DataFrame()
    
    for pql in pqls:
        
        directory = f'{outer_dir}/{pql}'
        
        if pql == 'Preseason_Qualifier_1':
            temp = pd.read_csv(f"{directory}/all_second.csv")
            temp['PQL'] = pql
            all_pqls = pd.concat([all_pqls, temp])
        else:
            temp = pd.read_csv(f"{directory}/all_rounds.csv")
            temp['PQL'] = pql
            all_pqls = pd.concat([all_pqls, temp])
    
    all_pqls.to_csv(f'{outer_dir}/all_pqls_noPQ1Rounds.csv', index=False)

In [70]:
# Combining all PQ1 first half for all regions

regions = ["APAC_North", "APAC_South", "EMEA", "North_America", "South_America"]

all_region_pql = pd.DataFrame()


for region in regions:
    outer_dir = f'../Outputs/{region}'
    directory = f'{outer_dir}/Preseason_Qualifier_1'
    temp = pd.read_csv(f"{directory}/all_first.csv")
    temp['Region'] = region
    all_region_pql = pd.concat([all_region_pql, temp])
    
all_region_pql.to_csv(f'../Outputs/Combined Region Data/all_region_PQL1Rounds.csv',index=False)

In [71]:
# Combining all PQ1 second half for all regions

regions = ["APAC_North", "APAC_South", "EMEA", "North_America", "South_America"]

all_region_pql = pd.DataFrame()


for region in regions:
    outer_dir = f'../Outputs/{region}'
    directory = f'{outer_dir}/Preseason_Qualifier_1'
    temp = pd.read_csv(f"{directory}/all_second.csv")
    temp['Region'] = region
    all_region_pql = pd.concat([all_region_pql, temp])
    
all_region_pql.to_csv(f'../Outputs/Combined Region Data/all_region_PQL1SecondHalf.csv',index=False)

In [72]:
# Combining PQ2-4 for all regions
regions = ["APAC_North", "APAC_South", "EMEA", "North_America", "South_America"]
pqls = ['Preseason_Qualifier_2', 'Preseason_Qualifier_3', 'Preseason_Qualifier_4']
all_region_pql = pd.DataFrame()

for region in regions:
    directory = f'../Outputs/{region}'
    temp = pd.read_csv(f"{directory}/all_pqls_noPQ1Rounds.csv")
    temp['Region'] = region
    all_region_pql = pd.concat([all_region_pql, temp])

all_region_pql.to_csv(f'../Outputs/Combined Region Data/all_region_PQLs_noPQ1.csv',index=False)

# Challenger Circuit

In [78]:
regions = ["APAC_North", "APAC_South", "EMEA", "North_America", "South_America"]
split = ['Split_1','Split_2']
chall_circuit = ['Challenger_Circuit_1', 'Challenger_Circuit_2', 'Challenger_Circuit_3', 'Challenger_Circuit_4']
rounds = ['Round_1', 'Round_2', 'Round_3','Quarterfinals', 'Semifinals', 'Finals']

for region in regions:
    for s in split:
        for c in chall_circuit:
            for r in rounds:
                directory = f'../Outputs/{region}/{s}/{c}/{r}/Cleaned'
                
                try:
                    folder_gen(directory)
                    final_df = lobby_combiner(directory)
                    final_df.to_csv(f'{directory}/Combined/{r}_combined.csv', index=False)
                except FileNotFoundError:
                    continue

In [83]:
regions = ["APAC_North", "APAC_South", "EMEA", "North_America", "South_America"]
split = ['Split_1','Split_2']
chall_circuit = ['Challenger_Circuit_1', 'Challenger_Circuit_2', 'Challenger_Circuit_3', 'Challenger_Circuit_4']
rounds = ['Round_1', 'Quarterfinals', 'Semifinals', 'Finals']

for region in regions:
    for s in split:
        for c in chall_circuit:
            circuit_df = pd.DataFrame()
            outter_dir = f'../Outputs/{region}/{s}/{c}'
            
            for r in rounds:
                try:
                    directory = f'{outter_dir}/{r}/Cleaned/Combined'
                    temp = pd.read_csv(f"{directory}/{r}_combined.csv")
                    temp['Round'] = f'{r}'
                    circuit_df = pd.concat([circuit_df, temp])   
                except FileNotFoundError:
                    continue
                
            circuit_df.to_csv(f'{outter_dir}/{c}_combined.csv',index=False)

In [88]:
for region in regions:
    for s in split:
        split_df = pd.DataFrame()
        outter_dir = f'../Outputs/{region}/{s}'
        for c in chall_circuit:
            try:
                directory = f'{outter_dir}/{c}'
                temp = pd.read_csv(f"{directory}/{c}_combined.csv")
                temp['Split'] = f'{s}'
                split_df = pd.concat([split_df, temp])   
            except (FileNotFoundError, pd.errors.EmptyDataError):
                continue

        split_df.to_csv(f'{outter_dir}/{s}_combined.csv',index=False)

In [90]:
for region in regions:
    region_df = pd.DataFrame()
    outter_dir = f'../Outputs/{region}'
    for s in split:
        try:
            directory = f'{outter_dir}/{s}'
            temp = pd.read_csv(f"{directory}/{s}_combined.csv")
            temp['Region'] = f'{region}'
            region_df = pd.concat([region_df, temp])   
        except (FileNotFoundError, pd.errors.EmptyDataError):
            continue

    region_df.to_csv(f'{outter_dir}/{region}_cc_combined.csv',index=False)

In [93]:
all_region_cc = pd.DataFrame()

for region in regions:
    directory = f'../Outputs/{region}'
    temp = pd.read_csv(f"{directory}/{region}_cc_combined.csv")
    temp['Region'] = region
    all_region_cc = pd.concat([all_region_cc, temp])

all_region_cc.to_csv(f'../Outputs/Combined Region Data/all_region_cc.csv',index=False)