# Capstone Project - Ryan Wood

The following blocks of code include my import statements and function definitions.

In [1]:
import os
import csv
import json
import numpy
import datetime
import pandas as pd
import urllib.request
import pprint

import pymongo
from sqlalchemy import create_engine
import matplotlib.pyplot as plt

In [2]:
host_name = "localhost"
host_ip = "127.0.0.1"
port = "3306"
user_id = "root2"
pwd = "123456789"

src_dbname = "capstonetables"
dst_dbname = "capstonetables_dw"

In [3]:
def get_dataframe(user_id, pwd, host_name, db_name, sql_query):
    conn_str = f"mysql+pymysql://{user_id}:{pwd}@{host_name}/{db_name}"
    sqlEngine = create_engine(conn_str, pool_recycle=3600)
    connection = sqlEngine.connect()
    dframe = pd.read_sql(sql_query, connection);
    connection.close()
    
    return dframe


def set_dataframe(user_id, pwd, host_name, db_name, df, table_name, pk_column, db_operation):
    conn_str = f"mysql+pymysql://{user_id}:{pwd}@{host_name}/{db_name}"
    sqlEngine = create_engine(conn_str, pool_recycle=3600)
    connection = sqlEngine.connect()
    
    if db_operation == "insert":
        df.to_sql(table_name, con=connection, index=False, if_exists='replace')
        sqlEngine.execute(f"ALTER TABLE {table_name} ADD PRIMARY KEY ({pk_column});")
            
    elif db_operation == "update":
        df.to_sql(table_name, con=connection, index=False, if_exists='append')
    
    connection.close()

In [4]:
# create the source database for raw data tables
try:
    conn_str = f"mysql+pymysql://{user_id}:{pwd}@{host_name}"
    sqlEngine = create_engine(conn_str, pool_recycle=3600)

    sqlEngine.execute(f"DROP DATABASE IF EXISTS `{src_dbname}`;")
    sqlEngine.execute(f"CREATE DATABASE `{src_dbname}`;")
    sqlEngine.execute(f"USE {src_dbname};")
except:
    print("There was an error creating a sql engine from the information provided in the program")

In [5]:
# create the destination database for dim/fact tables
conn_str = f"mysql+pymysql://{user_id}:{pwd}@{host_name}"
sqlEngine = create_engine(conn_str, pool_recycle=3600)

sqlEngine.execute(f"DROP DATABASE IF EXISTS `{dst_dbname}`;")
sqlEngine.execute(f"CREATE DATABASE `{dst_dbname}`;")
sqlEngine.execute(f"USE {dst_dbname};")

<sqlalchemy.engine.cursor.LegacyCursorResult at 0x279e58ca5b0>

In [6]:
def get_sql_dataframe(user_id, pwd, host_name, db_name, sql_query):
    '''Create a connection to the MySQL database'''
    conn_str = f"mysql+pymysql://{user_id}:{pwd}@{host_name}/{db_name}"
    sqlEngine = create_engine(conn_str, pool_recycle=3600)
    
    '''Invoke the pd.read_sql() function to query the database, and fill a Pandas DataFrame.'''
    conn = sqlEngine.connect()
    dframe = pd.read_sql(sql_query, conn);
    conn.close()
    
    return dframe


def get_mongo_dataframe(user_id, pwd, host_name, port, db_name, collection, query):
    '''Create a connection to MongoDB, with or without authentication credentials'''
    if user_id and pwd:
        mongo_uri = 'mongodb://%s:%s@%s:%s/%s' % (username, password, host, port, db_name)
        client = pymongo.MongoClient(mongo_uri)
    else:
        conn_str = f"mongodb://{host_name}:{port}/"
        client = pymongo.MongoClient(conn_str)
    
    '''Query MongoDB, and fill a python list with documents to create a DataFrame'''
    db = client[db_name]
    dframe = pd.DataFrame(list(db[collection].find(query)))
    dframe.drop(['_id'], axis=1, inplace=True)
    client.close()
    
    return dframe


def set_dataframe(user_id, pwd, host_name, db_name, df, table_name, pk_column, db_operation):
    '''Create a connection to the MySQL database'''
    conn_str = f"mysql+pymysql://{user_id}:{pwd}@{host_name}/{db_name}"
    sqlEngine = create_engine(conn_str, pool_recycle=3600)
    connection = sqlEngine.connect()
    
    '''Invoke the Pandas DataFrame .to_sql( ) function to either create, or append to, a table'''
    if db_operation == "insert":
        df.to_sql(table_name, con=connection, index=False, if_exists='replace')
        sqlEngine.execute(f"ALTER TABLE {table_name} ADD PRIMARY KEY ({pk_column});")
            
    elif db_operation == "update":
        df.to_sql(table_name, con=connection, index=False, if_exists='append')
    
    connection.close()

Now begins my conversion from a csv file to mySQL Database.

In [7]:
# AttendanceTable file
try:
    data_dir = os.path.join(os.getcwd(), 'Rockies Data')
    data_file = os.path.join(data_dir, 'AttendanceTable.csv')
    df_attendance = pd.read_csv(data_file)
    df_attendance.head()
except:
    print("The file could not be read in correctly.")
    print("Make sure the data file is in the correct directory so it can be inserted properly.")

In [8]:
df_attendance.head()

Unnamed: 0,Date,Opponent,Time,D/N,Attendance,Capacity,Percent Attendance
0,4/5/2019,LAD,3:52,D,48404,50144,96.53
1,4/6/2019,LAD,3:19,N,47880,50144,95.49
2,4/7/2019,LAD,3:52,N,41232,50144,82.23
3,4/8/2019,ATL,2:59,N,25199,50144,50.25
4,4/9/2019,ATL,3:12,N,26124,50144,52.1


In [9]:
start_val = 1
df_attendance.insert(loc=0, column = 'surr_key', value = range(start_val, len(df_attendance) + start_val))
df_attendance.head(5)

Unnamed: 0,surr_key,Date,Opponent,Time,D/N,Attendance,Capacity,Percent Attendance
0,1,4/5/2019,LAD,3:52,D,48404,50144,96.53
1,2,4/6/2019,LAD,3:19,N,47880,50144,95.49
2,3,4/7/2019,LAD,3:52,N,41232,50144,82.23
3,4,4/8/2019,ATL,2:59,N,25199,50144,50.25
4,5,4/9/2019,ATL,3:12,N,26124,50144,52.1


In [10]:
set_dataframe(user_id, pwd, host_name, src_dbname, df_attendance, "attendance", "surr_key", "insert")

In [11]:
# GameResultTable file
try:
    data_dir = os.path.join(os.getcwd(), 'Rockies Data')
    data_file = os.path.join(data_dir, 'GameResultTable.csv')
    df_gameresults = pd.read_csv(data_file)
    df_gameresults.head()
except:
    print("The file could not be read in correctly.")
    print("Make sure the data file is in the correct directory so it can be inserted properly.")

In [12]:
start_val = 1
df_gameresults.insert(loc=0, column = 'surr_key', value = range(start_val, len(df_gameresults) + start_val))
df_gameresults.head(5)

Unnamed: 0,surr_key,Date,Opponent,Result,Runs Scored,Runs Against,Wins,Losses,Time
0,1,4/5/2019,LAD,L,6,10,3,5,3:52
1,2,4/6/2019,LAD,L,2,7,3,6,3:19
2,3,4/7/2019,LAD,L,6,12,3,7,3:52
3,4,4/8/2019,ATL,L,6,8,3,8,2:59
4,5,4/9/2019,ATL,L,1,7,3,9,3:12


In [13]:
set_dataframe(user_id, pwd, host_name, src_dbname, df_gameresults, "gameresults", "surr_key", "insert")

<b>To satisfy Benchmark 1:</b> I used the data from mySQL Workbench and MongoDB. I loaded it into a dataframe in this file, made some transformations, and pushed them all back to mySQL Workbench as <b>dimension tables</b>.

In [14]:
sql_attendance = "SELECT * FROM capstonetables.attendance;"
df_attendance_dim = get_dataframe(user_id, pwd, host_name, src_dbname, sql_attendance)
df_attendance_dim.head(2)

Unnamed: 0,surr_key,Date,Opponent,Time,D/N,Attendance,Capacity,Percent Attendance
0,1,4/5/2019,LAD,3:52,D,48404,50144,96.53
1,2,4/6/2019,LAD,3:19,N,47880,50144,95.49


In [15]:
df_attendance_dim.drop(["Time", "D/N"], axis=1, inplace=True)
df_attendance_dim.head(2)

Unnamed: 0,surr_key,Date,Opponent,Attendance,Capacity,Percent Attendance
0,1,4/5/2019,LAD,48404,50144,96.53
1,2,4/6/2019,LAD,47880,50144,95.49


In [16]:
sql_gameresults = "SELECT * FROM capstonetables.gameresults;"
df_gameresults_dim = get_dataframe(user_id, pwd, host_name, src_dbname, sql_gameresults)
df_gameresults_dim.head(2)

Unnamed: 0,surr_key,Date,Opponent,Result,Runs Scored,Runs Against,Wins,Losses,Time
0,1,4/5/2019,LAD,L,6,10,3,5,3:52
1,2,4/6/2019,LAD,L,2,7,3,6,3:19


In [17]:
df_gameresults_dim.drop(["Wins", "Losses", "Time"], axis=1, inplace=True)
df_gameresults_dim.head(2)

Unnamed: 0,surr_key,Date,Opponent,Result,Runs Scored,Runs Against
0,1,4/5/2019,LAD,L,6,10
1,2,4/6/2019,LAD,L,2,7


In [18]:
# inserting dimension tables to mySQL Workbench
db_operation = "insert"

tables = [('dim_attendance', df_attendance_dim, 'surr_key'),
          ('dim_results', df_gameresults_dim, 'surr_key')]

for table_name, dataframe, primary_key in tables:
    set_dataframe(user_id, pwd, host_name, dst_dbname, dataframe, table_name, primary_key, db_operation)

More function definitions

In [19]:
# def get_sql_dataframe(user_id, pwd, host_name, db_name, sql_query):
#     '''Create a connection to the MySQL database'''
#     conn_str = f"mysql+pymysql://{user_id}:{pwd}@{host_name}/{db_name}"
#     sqlEngine = create_engine(conn_str, pool_recycle=3600)
    
#     '''Invoke the pd.read_sql() function to query the database, and fill a Pandas DataFrame.'''
#     conn = sqlEngine.connect()
#     dframe = pd.read_sql(sql_query, conn);
#     conn.close()
    
#     return dframe


# def get_mongo_dataframe(user_id, pwd, host_name, port, db_name, collection, query):
#     '''Create a connection to MongoDB, with or without authentication credentials'''
#     if user_id and pwd:
#         mongo_uri = 'mongodb://%s:%s@%s:%s/%s' % (username, password, host, port, db_name)
#         client = pymongo.MongoClient(mongo_uri)
#     else:
#         conn_str = f"mongodb://{host_name}:{port}/"
#         client = pymongo.MongoClient(conn_str)
    
#     '''Query MongoDB, and fill a python list with documents to create a DataFrame'''
#     db = client[db_name]
#     dframe = pd.DataFrame(list(db[collection].find(query)))
#     dframe.drop(['_id'], axis=1, inplace=True)
#     client.close()
    
#     return dframe


# def set_dataframe(user_id, pwd, host_name, db_name, df, table_name, pk_column, db_operation):
#     '''Create a connection to the MySQL database'''
#     conn_str = f"mysql+pymysql://{user_id}:{pwd}@{host_name}/{db_name}"
#     sqlEngine = create_engine(conn_str, pool_recycle=3600)
#     connection = sqlEngine.connect()
    
#     '''Invoke the Pandas DataFrame .to_sql( ) function to either create, or append to, a table'''
#     if db_operation == "insert":
#         df.to_sql(table_name, con=connection, index=False, if_exists='replace')
#         sqlEngine.execute(f"ALTER TABLE {table_name} ADD PRIMARY KEY ({pk_column});")
            
#     elif db_operation == "update":
#         df.to_sql(table_name, con=connection, index=False, if_exists='append')
    
#     connection.close()

Loading my JSON file into MongoDB to allow for my dataframe to make transformations.

In [20]:
ports = {"mongo" : 27017, "mysql" : 3306}
port = ports["mongo"]
conn_str = f"mongodb://{host_name}:{port}/"
client = pymongo.MongoClient(conn_str)
db = client[src_dbname]

data_dir = os.path.join(os.getcwd(), 'Rockies Data')

json_files = {"weather" : 'WeatherTable.json'
             }

for file in json_files:
    json_file = os.path.join(data_dir, json_files[file])
    with open(json_file, 'r') as openfile:
        json_object = json.load(openfile)
        file = db[file]
        result = file.insert_many(json_object)
        print(f"{file} was successfully loaded.")

Collection(Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), 'capstonetables'), 'weather') was successfully loaded.


In [21]:
query = {}
port = ports["mongo"]
collection = "weather"

df_mongo = get_mongo_dataframe(None, None, host_name, port, src_dbname, collection, query)
df_mongo.head(5)

Unnamed: 0,Date,Avg Temp (F),Low Temp (F),High Temp (F),Wind (mph)
0,3/28/2019,51.08,39.02,73.94,8.95
1,3/29/2019,38.3,37.94,62.96,9.88
2,3/30/2019,32.0,28.04,42.98,7.08
3,3/31/2019,34.16,28.04,39.02,5.59
4,4/1/2019,40.82,28.94,48.92,8.57


In [22]:
df_mongo.drop("Wind (mph)", axis=1, inplace=True)
df_mongo.head(2)

Unnamed: 0,Date,Avg Temp (F),Low Temp (F),High Temp (F)
0,3/28/2019,51.08,39.02,73.94
1,3/29/2019,38.3,37.94,62.96


In [23]:
start_val = 1
df_mongo.insert(loc=0, column = 'surr_key', value = range(start_val, len(df_mongo) + start_val))
df_mongo.head(5)

Unnamed: 0,surr_key,Date,Avg Temp (F),Low Temp (F),High Temp (F)
0,1,3/28/2019,51.08,39.02,73.94
1,2,3/29/2019,38.3,37.94,62.96
2,3,3/30/2019,32.0,28.04,42.98
3,4,3/31/2019,34.16,28.04,39.02
4,5,4/1/2019,40.82,28.94,48.92


In [24]:
set_dataframe(user_id, pwd, host_name, dst_dbname, df_mongo, "dim_weather", "surr_key", "insert")

In [25]:
client.close()

<b>To satisfy Benchmark 2:</b> I joined the 3 dimension tables together. After joining them, I use <b>streaming data</b> to allow myself to write select statements and get useful data out of the API calls.

In [26]:
sql_fact_attendance_stats = """
    SELECT
        a.`surr_key`,
        a.`Date`,
        a.`Opponent`,
        r.`Result`,
        r.`Runs Scored`,
        r.`Runs Against`,
        a.`Attendance`,
        a.`Capacity`,
        a.`Percent Attendance`,
        w.`Avg Temp (F)`,
        w.`Low Temp (F)`,
        w.`High Temp (F)`
    FROM `capstonetables_dw`.`dim_attendance` as a
    INNER JOIN `capstonetables_dw`.`dim_results` as r
    ON a.`surr_key` = r.`surr_key`
    INNER JOIN `capstonetables_dw`.`dim_weather` as w
    ON a.Date = w.Date
    ORDER BY a.`surr_key`;
"""

In [27]:
df_fact_attendance_stats = get_dataframe(user_id, pwd, host_name, src_dbname, sql_fact_attendance_stats)
df_fact_attendance_stats.head(50)

Unnamed: 0,surr_key,Date,Opponent,Result,Runs Scored,Runs Against,Attendance,Capacity,Percent Attendance,Avg Temp (F),Low Temp (F),High Temp (F)
0,1,4/5/2019,LAD,L,6,10,48404,50144,96.53,53.6,35.96,66.92
1,2,4/6/2019,LAD,L,2,7,47880,50144,95.49,53.42,42.08,71.06
2,3,4/7/2019,LAD,L,6,12,41232,50144,82.23,53.42,44.06,68.0
3,4,4/8/2019,ATL,L,6,8,25199,50144,50.25,57.56,42.08,71.06
4,5,4/9/2019,ATL,L,1,7,26124,50144,52.1,61.16,44.06,75.02
5,6,4/18/2019,PHI,W,6,2,27562,50144,54.97,47.66,39.02,62.06
6,7,4/19/2019,PHI,W,4,3,35423,50144,70.64,53.6,35.96,62.96
7,8,4/20/2019,PHI,L,5,8,40530,50144,80.83,60.8,37.04,71.96
8,9,4/21/2019,PHI,W,4,1,28287,50144,56.41,54.5,48.92,80.96
9,10,4/22/2019,WSN,W,7,5,20517,50144,40.92,44.06,41.0,69.08


In [28]:
table_name = "fact_attendance_stats"
primary_key = "surr_key"
db_operation = "insert"

set_dataframe(user_id, pwd, host_name, dst_dbname, df_fact_attendance_stats, table_name, primary_key, db_operation)

### API Streaming Data

In [29]:
# more import statements
import os
import json
import pprint
import requests
import requests.exceptions
import pandas as pd
import time
from datetime import date
from datetime import datetime

More function definitions

In [30]:
def get_api_response(url, response_type):
    try:
        response = requests.get(url)
        response.raise_for_status()
    
    except requests.exceptions.HTTPError as errh:
        return "An Http Error occurred: " + repr(errh)
    except requests.exceptions.ConnectionError as errc:
        return "An Error Connecting to the API occurred: " + repr(errc)
    except requests.exceptions.Timeout as errt:
        return "A Timeout Error occurred: " + repr(errt)
    except requests.exceptions.RequestException as err:
        return "An Unknown Error occurred: " + repr(err)

    if response_type == 'json':
        result = json.dumps(response.json(), sort_keys=True, indent=4)
    elif response_type == 'dataframe':
        result = pd.json_normalize(response.json())
    else:
        result = "An unhandled error has occurred!"
        
    return result

Get the current date and time to make my streaming data have a timestamp

In [31]:
# code adapted and used from https://www.programiz.com/python-programming/datetime/current-datetime
from datetime import datetime

# datetime object containing current date and time
now = datetime.now()

# dd/mm/YY H:M:S
dt_string = now.strftime("%m/%d/%Y %H:%M:%S")
print(dt_string)

05/11/2022 23:45:15


Please allow <b>almost 2 minutes</b> for this command to run. It is streaming data from my Weather API, and I wanted the spacing between calls to be enough so that there might be some variation in the data I collect.

In [32]:
import requests
import json
import pandas as pd
import time
from datetime import datetime

url = "https://community-open-weather-map.p.rapidapi.com/weather"

querystring = {"q":"Denver,usa","lat":"0","lon":"0","id":"2172797","lang":"null","units":"imperial"}

headers = {
	"X-RapidAPI-Host": "community-open-weather-map.p.rapidapi.com",
	"X-RapidAPI-Key": "9d95859ef4msh1bff28eb9a10263p1e0e90jsnaf12afa0b707"
}

counter = 0

# first time through
response = requests.request("GET", url, headers=headers, params=querystring)

# print(response.text)

data = json.loads(response.text)
df = pd.json_normalize(data)

# datetime object containing current date and time
now = datetime.now()
# dd/mm/YY H:M:S
dt_string = now.strftime("%m/%d/%Y %H:%M:%S")
# print(dt_string)

df.insert(0, 'Date', dt_string)

while counter < 4:
    time.sleep(20)
    response = requests.request("GET", url, headers=headers, params=querystring)

    # print(response.text)

    data = json.loads(response.text)
    clean_up = pd.json_normalize(data)
    
    # datetime object containing current date and time
    now = datetime.now()
    # dd/mm/YY H:M:S
    dt_string = now.strftime("%m/%d/%Y %H:%M:%S")
    # print(dt_string)

    clean_up.insert(0, 'Date', dt_string)
    
    df = df.append(clean_up, ignore_index = True)
    
    counter = counter + 1
    

dropcols = ['weather', 'base', 'visibility', 'dt', 'timezone', 'id', 'cod', 'main.pressure', 'main.humidity',
            'wind.deg', 'wind.gust', 'clouds.all', 'sys.type', 'sys.id', 'sys.sunrise','sys.sunset', 
            'main.feels_like', 'sys.country']
df.drop(dropcols, axis=1, inplace=True)

rename_cols = {"name":"City", "coord.lon":"Longitude", "coord.lat":"Latitude", "main.temp":"Temp",
               "main.temp_min":"Low Temp", "main.temp_max":"High Temp", "wind.speed":"Wind Speed"}
df.rename(columns=rename_cols, inplace=True)

In [33]:
df

Unnamed: 0,Date,City,Longitude,Latitude,Temp,Low Temp,High Temp,Wind Speed
0,05/11/2022 23:45:16,Denver,-104.9847,39.7392,69.35,63.95,73.58,5.01
1,05/11/2022 23:45:36,Denver,-104.9847,39.7392,69.22,63.95,73.58,5.01
2,05/11/2022 23:45:56,Denver,-104.9847,39.7392,69.22,63.95,73.58,5.01
3,05/11/2022 23:46:17,Denver,-104.9847,39.7392,69.22,63.95,73.58,5.01
4,05/11/2022 23:46:37,Denver,-104.9847,39.7392,69.35,63.95,73.58,5.01


In [34]:
start_val = 1
df.insert(loc=0, column = 'surr_key', value = range(start_val, len(df) + start_val))
df.head(5)

Unnamed: 0,surr_key,Date,City,Longitude,Latitude,Temp,Low Temp,High Temp,Wind Speed
0,1,05/11/2022 23:45:16,Denver,-104.9847,39.7392,69.35,63.95,73.58,5.01
1,2,05/11/2022 23:45:36,Denver,-104.9847,39.7392,69.22,63.95,73.58,5.01
2,3,05/11/2022 23:45:56,Denver,-104.9847,39.7392,69.22,63.95,73.58,5.01
3,4,05/11/2022 23:46:17,Denver,-104.9847,39.7392,69.22,63.95,73.58,5.01
4,5,05/11/2022 23:46:37,Denver,-104.9847,39.7392,69.35,63.95,73.58,5.01


In [35]:
set_dataframe(user_id, pwd, host_name, dst_dbname, df, "stream_data", "surr_key", "insert")

In [36]:
sql_average_temp = """
    SELECT AVG(Temp)
    FROM `capstonetables_dw`.`stream_data`;
"""

In [37]:
df_average_temp = get_dataframe(user_id, pwd, host_name, "capstonetables_dw", sql_average_temp)
df_average_temp.head()

Unnamed: 0,AVG(Temp)
0,69.272


In [38]:
s = df_average_temp['AVG(Temp)']
value = s[0]

In [39]:
print(value)

69.272


In [40]:
sql_games_within_range = """
    SELECT * FROM capstonetables_dw.fact_attendance_stats
    WHERE `capstonetables_dw`.`fact_attendance_stats`.`Avg Temp (F)` <""" + str(value) + """ + 5
    AND `capstonetables_dw`.`fact_attendance_stats`.`Avg Temp (F)` > """ + str(value) + """ - 5;
"""

In [41]:
df_games_within_range = get_dataframe(user_id, pwd, host_name, "capstonetables_dw", sql_games_within_range)
df_games_within_range.head(50)

Unnamed: 0,surr_key,Date,Opponent,Result,Runs Scored,Runs Against,Attendance,Capacity,Percent Attendance,Avg Temp (F),Low Temp (F),High Temp (F)
0,30,6/2/2019,TOR,W,5,1,37861,50144,75.5,64.76,48.02,75.02
1,32,6/11/2019,CHC,W,10,3,43126,50144,86.0,65.12,46.04,80.96
2,34,6/13/2019,SDP,W,9,6,35504,50144,70.8,68.54,51.98,73.94
3,35,6/14/2019,SDP,L,12,16,38077,50144,75.94,71.6,53.06,87.08
4,36,6/15/2019,SDP,W,14,8,46133,50144,92.0,66.92,53.96,84.92
5,37,6/16/2019,SDP,L,13,14,47526,50144,94.78,67.1,53.06,78.98
6,42,7/2/2019,HOU,L,8,9,47864,50144,95.45,73.94,57.02,87.08
7,58,8/13/2019,ARI,L,3,9,31815,50144,63.45,73.4,60.08,89.96
8,59,8/14/2019,ARI,W,7,6,32247,50144,64.31,73.4,59.0,89.96
9,63,8/26/2019,ATL,W,3,1,29803,50144,59.43,73.94,59.0,96.08


In [42]:
float_percent = df_games_within_range["Percent Attendance"].mean()
avg_percent_attendance = "{:.2f}".format(float_percent)

In [43]:
print("The average percent attendance of games with a mean temperature within 5 degrees of the current temperature",
      "in Denver today is", avg_percent_attendance, "%")

The average percent attendance of games with a mean temperature within 5 degrees of the current temperature in Denver today is 74.04 %


In [44]:
df_games_within_range.to_excel(r'C:\Users\rpwoo\Desktop\Second Year Spring (2022)\DS 3002\DS-3002-01-main\FINAL PROJECT\games_within_temp_range.xlsx', index = False, header=True)

<b>To satisfy Benchmarks 3-8:</b> For Benchmarks 3-6, all of my code and files are in the GitHub site I will submit to Collab. For benchmark 7, I have a jpg file that shows the data visualization I created. And for Benchmark 8, all of my code will be in the GitHub site as well.