In [24]:
#imports
import boto3
from botocore.exceptions import ClientError #for debugging
from dotenv import load_dotenv #for env variables
import os
import pandas as pd

In [25]:
#load the variables/password/IDs from the .env file
load_dotenv()
#get the access and secret key 
access_key_id = os.getenv('ACCESS_KEY_ID')
secret_key = os.getenv('SECRET_ACCESS_KEY')

#declare bucket name and data folder name where data will be downloaded to
bucket_name = "mindex-data-analytics-code-challenge"
data_folder = "/Users/mady/mindex_data_analytics_challenge/data/"

#print  key variables to verify that hold the correct values as what was provided
#print('access_key_id: ', access_key_id)
#print('secret_key: ', secret_key)


In [None]:

#create s3 client
s3 = boto3.client('s3', aws_access_key_id=access_key_id, aws_secret_access_key=secret_key, region_name='us-east-1')

#file names to download
files = ['bengals.csv', 'boyd_receiving.csv', 'chase_receiving.csv', 'higgins_receiving.csv']

#download each file
for file in files:
    try:
        s3.download_file(bucket_name, file, data_folder + file)
        print(file + " downloaded successfully")
    except Exception as e:
        print(file + "error: ", e)

 

bengals.csv downloaded successfully
boyd_receiving.csv downloaded successfully
chase_receiving.csv downloaded successfully
higgins_receiving.csv downloaded successfully


In [54]:
'''
Use the pandas library to load each CSV into its own dataframe.
'''
dataframes = {} #dictionary to hold the dataframes for each player
player_files = ['/Users/mady/mindex_data_analytics_challenge/data/boyd_receiving.csv', '/Users/mady/mindex_data_analytics_challenge/data/chase_receiving.csv', '/Users/mady/mindex_data_analytics_challenge/data/higgins_receiving.csv']

#put each file into a pandas dataframe
for file in player_files:
    csv = file.split('/data/')[1]
    player_name = csv.split('_')[0].capitalize()
    print(player_name + " dataframe created")
    dataframes[player_name] = pd.read_csv(file)
    #add player name as a column
    dataframes[player_name]['Player'] = player_name

#tst if that worked
print('****** Boyd df ****** \n ', dataframes['Boyd'].head())
print('****** Higgins df ****** \n ',dataframes['Higgins'].head())
print('****** Higgins df ****** \n ',dataframes['Chase'].head())

#load bengals data into a panda frame
dataframes['Bangals'] = pd.read_csv('/Users/mady/mindex_data_analytics_challenge/data/bengals.csv')
print('****** Bengals df ****** \n ',dataframes['Bangals'].head())



Boyd dataframe created
Chase dataframe created
Higgins dataframe created
****** Boyd df ****** 
     Week  Yards  TD Player
0  REG1     32   0   Boyd
1  REG2     73   0   Boyd
2  REG3     36   1   Boyd
3  REG4    118   0   Boyd
4  REG5     24   0   Boyd
****** Higgins df ****** 
     Week  Yards  TD   Player
0  REG1     58   1  Higgins
1  REG2     60   1  Higgins
2  REG5     32   0  Higgins
3  REG6     44   0  Higgins
4  REG7     62   0  Higgins
****** Higgins df ****** 
     Week  Yards  TD Player
0  REG1    101   1  Chase
1  REG2     54   1  Chase
2  REG3     65   2  Chase
3  REG4     77   0  Chase
4  REG5    159   1  Chase
****** Bengals df ****** 
     Week Opponent Location  Result
0  PRE1       TB     Away     1.0
1  PRE2      WSH     Away     0.0
2  PRE3      MIA     Home     0.0
3  REG1      MIN     Home     1.0
4  REG2      CHI     Away     0.0


In [60]:
'''
Join/Merge all of the dataframes together to display one global table that shows
the three different receiver’s yards and touchdown (TD) data as well as every
game result. Be sure to include Opponent, Location, and Result fields from the
bengals.csv file.

Replace the ‘1.0’ or ‘0.0’ values in the Result field to display ‘Win’ or ‘Loss’,
respectively.
'''

#concat the player data frames
df = pd.concat([dataframes['Boyd'], dataframes['Higgins'], dataframes['Chase']])

#join to the bengals data frame
df = df.merge(dataframes['Bangals'], on='Week')

#replace the 1.0 and 0.0 values in the Result field
df = df.replace({'Result': {1.0: 'Win', 0.0: 'Loss'}})

#save the global dataframe to a csv file 
df.to_csv('/Users/mady/mindex_data_analytics_challenge/data/global.csv', index=False)
