# <u>Bengals Data ETL</u>

### Establish Package Imports

In [49]:
import boto3
import pandas as pd
import io

### Create Connection Class for AWS S3 Bucket

Notes:
- AWS Credentials were setup with Environment Variables

Implementation:
There were many approaches I felt I could take with this. I decided implement this with the intent that maybe we might want to add additional teams to this if we threw in some more csvs. I broke it up into different classes as well to help breakdown some of my thought process & hopefully made it easier to follow.

In [50]:
class Data_Connection(object):
    def __init__(self):
        self.aws_connection = boto3.Session()
        self.s3_connection = self.aws_connection.client('s3')
        self.aws_bucket = 'mindex-data-analytics-code-challenge'
    
    def check_bucket_connection(self):
        print("Configurating Connection to S3 Bucket...")
        try:
            response = self.s3_connection.list_objects(Bucket=self.aws_bucket)
            print("Connection Established!")
        except Exception as e:
            print(e)
    
    def bucket_extract(self):
        
        #Get list of Data Sources in S3 Bucket
        response = self.s3_connection.list_objects(Bucket=self.aws_bucket)
        sources = response.get("Contents")
        
        
        #Loop through Data Sources found in S3 Bucket
        data_sources = {}
        
        print('Extracting Sources...')
        for source in sources:
            s3_obj = self.s3_connection.get_object(Bucket=self.aws_bucket, Key=source['Key'])
            nfl_df = pd.read_csv(io.BytesIO(s3_obj['Body'].read()))
            
            #Hold Dataframe in Dictionary to easier analysis
            source_name = source['Key'].split('.')[0]
            data_sources[source_name] = {}
            data_sources[source_name]['data'] = nfl_df
            
            if "receiving" in source_name: 
                data_sources[source_name]['identification'] = 'player_data'
            else:
                data_sources[source_name]['identification'] = 'team_data'
            
            
        print('Extraction Completed!')
        
        return data_sources 

In [51]:
connection = Data_Connection()
connection.check_bucket_connection()

Configurating Connection to S3 Bucket...
Connection Established!


In [52]:
nfl_dataframes = connection.bucket_extract()

Extracting Sources...
Extraction Completed!


In [55]:
class Data_Transformation(object):
    def __init__(self,data_dict):
        self.trans_data = data_dict
        self.players = None
        self.team = None
    
    
    def transform_data(self):
        temp_player_dfs = []
        for key in self.trans_data:
            
            #Check if player data: execute transformations
            if self.trans_data[key]['identification'] == 'player_data':
                df = self.trans_data[key]['data']
                df['player_name'] = key.split('_')[0]
                temp_player_dfs.append(df)
                print(df)
            #Else if its team data: execute transformations
            elif self.trans_data[key]['identification'] == 'team_data':
                df = self.trans_data[key]['data']
                df['Result'].replace(to_replace=1, value='Win', inplace=True)
                df['Result'].replace(to_replace=0, value='Loss', inplace=True)
                self.team = df
                print(self.team)
            else:
                pass
        self.players = pd.concat(temp_player_dfs, ignore_index=True)
    
    def merge_data(self):
        merged_df = pd.merge(self.players, self.team, how='left')
        print(merged_df)
        

In [56]:
trans = Data_Transformation(nfl_dataframes)
trans.transform_data()
trans.merge_data()

     Week Opponent Location Result
0    PRE1       TB     Away    Win
1    PRE2      WSH     Away   Loss
2    PRE3      MIA     Home   Loss
3    REG1      MIN     Home    Win
4    REG2      CHI     Away   Loss
5    REG3      PIT     Away    Win
6    REG4      JAX     Home    Win
7    REG5       GB     Home   Loss
8    REG6      DET     Away    Win
9    REG7      BAL     Away    Win
10   REG8      NYJ     Away   Loss
11   REG9      CLE     Home   Loss
12  REG10      NaN      NaN    NaN
13  REG11       LV     Away    Win
14  REG12      PIT     Home    Win
15  REG13      LAC     Home   Loss
16  REG14       SF     Home   Loss
17  REG15      DEN     Away    Win
18  REG16      BAL     Home    Win
19  REG17       KC     Home    Win
20  REG18      CLE     Away   Loss
21  POST1       LV     Home    Win
22  POST2      TEN     Away    Win
23  POST3       KC     Away    Win
24  POST4      LAR  Neutral   Loss
     Week  Yards  TD player_name
0    REG1     32   0        boyd
1    REG2     73   0    