### SECTION 1: ENVIRONMENT SETUP

In [1]:
# Install necessary libraries
!pip install pymysql
!pip install mysql-connector-python
!pip install sqlalchemy
!pip install pymongo
!pip install pandas
!pip install matplotlib
!pip install schedule



In [2]:
# Import libraries
import os
import pymysql
import mysql.connector
from sqlalchemy import create_engine
import pymongo
import datetime
import json
import requests
import schedule
import time
import string
import random

import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

### SECTION 2: CONNECT TO THE DATA_PROJECT_TWO DATABASE

In [3]:
# Database connection configuration
host = "localhost"
user = "root"
password = "ds2002spring2024"
main_database = "data_project_two"

# Create an SQLAlchemy engine for the data_project_two database
data_project_two_engine = create_engine(f'mysql+pymysql://{user}:{password}@{host}/{main_database}')

try:
    # Connect to the data_project_two database
    data_project_two_connection = pymysql.connect(host=host, user=user, password=password, database=main_database)
    print("Connection to the data_project_two database was successful!")
except Exception as e:
    print(f"An error occured while connecting to the data_project_two database: {e}")

Connection to the data_project_two database was successful!


### SECTION 3: EXTRACT DATA FROM THE REMOTE DATA API AND LOAD DATA INTO THE DATA_PROJECT_TWO DATABASE

In [4]:
def extract_and_load_data():
    # Define the URL for the API endpoint
    api_url = "https://4feaquhyai.execute-api.us-east-1.amazonaws.com/api/pi"
    try:
        # Send a GET request to the API to extract data
        response = requests.get(api_url)
        # Check if the request was successful
        if response.status_code == 200:
            # Convert the JSON response to a DataFrame
            api_pi_data = response.json()
            # Extract and transform pi data from the API call
            current_time = datetime.datetime.now()
            start_of_minute = current_time.replace(second=0, microsecond=0)
            api_pi_data['time'] = start_of_minute
            api_pi_data_df = pd.DataFrame([{
                'factor': api_pi_data['factor'],
                'pi': api_pi_data['pi'],
                'time': api_pi_data['time']
            }])
            # Load pi data into database
            api_pi_data_df.to_sql('pi_data', data_project_two_engine, if_exists='append', index=False)
        else:
            print(f"Failed to retrieve data from API: Status code {response.status_code}")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")

### SECTION 4: QUERY DATA IN THE DATA_PROJECT_TWO DATABASE TO VERIFY CONSISTENT CODE EXECUTION

In [5]:
def query_database_contents(connection):
    try:
        # Query the database for all records and return the corresponding DataFrame
        pi_data_query = "SELECT * FROM pi_data"
        pi_data_query_df = pd.read_sql(pi_data_query, connection)
        return pi_data_query_df
    except Exception as e:
        print(f"Failed to fetch or display database contents: {e}")
        # Return an empty DataFrame on failure
        return pd.DataFrame()

def data_query_connection():
    try:
        # Connect to the data_project_two database for data querying
        data_query_connection = pymysql.connect(host=host, user=user, password=password, database=main_database)
        return data_query_connection
    except Exception as e:
        return None

def format_output(minute, dataframe):
    # Define the format of the results to show the relevant minute and the current status of the database table
    print(f"Database Table at Minute {minute:02}:")
    print(dataframe)
    print()

### SECTION 5: SCHEDULE THE EXTRACTION, LOADING, AND DISPLAY OF THE DATA IN THE DATA_PROJECT_TWO DATABASE

In [6]:
# Initialize a minute counter to output database changes properly (from Minute 00 to Minute 59)
minute_counter = 0

def process():
    global minute_counter
    # Proceed with the process execution at the start of every minute
    current_time = datetime.datetime.now()
    if current_time.second == 0:
        # Extract data from the API and load it into the database
        extract_and_load_data()
        # Fetch and display the current contents of the database to verify consistent code execution
        database_contents = query_database_contents(data_query_connection())
        format_output(minute_counter, database_contents)
        minute_counter += 1

# Initialize scheduling of the process function
schedule.every().second.do(process)

# Main loop to manage and execute the scheduled process
try:
    # Determine the current time to establish the desired one hour run duration
    start_time = datetime.datetime.now()
    end_time = start_time + datetime.timedelta(hours=1)
    # Execute the process for the specified duration
    while datetime.datetime.now() < end_time:
        schedule.run_pending()
        time.sleep(1)
except KeyboardInterrupt:
    print("The user interrupted the scheduling.")
except Exception as e:
    print(f"An error occurred during scheduling: {e}")

Database Table at Minute 00:
   factor        pi                time
0  148877  3.141599 2024-04-16 20:53:00

Database Table at Minute 01:
   factor        pi                time
0  148877  3.141599 2024-04-16 20:53:00
1  157464  3.141586 2024-04-16 20:54:00

Database Table at Minute 02:
   factor        pi                time
0  148877  3.141599 2024-04-16 20:53:00
1  157464  3.141586 2024-04-16 20:54:00
2  166375  3.141599 2024-04-16 20:55:00

Database Table at Minute 03:
   factor        pi                time
0  148877  3.141599 2024-04-16 20:53:00
1  157464  3.141586 2024-04-16 20:54:00
2  166375  3.141599 2024-04-16 20:55:00
3  175616  3.141587 2024-04-16 20:56:00

Database Table at Minute 04:
   factor        pi                time
0  148877  3.141599 2024-04-16 20:53:00
1  157464  3.141586 2024-04-16 20:54:00
2  166375  3.141599 2024-04-16 20:55:00
3  175616  3.141587 2024-04-16 20:56:00
4  185193  3.141598 2024-04-16 20:57:00

Database Table at Minute 05:
   factor        pi  