In [38]:
import numpy as np
import pandas as pd
import matplotlib as plt
import seaborn as sns
import requests
import os
import pyodbc
from datetime import datetime
from sqlalchemy import create_engine, exc
from dotenv import load_dotenv

load_dotenv()

True

In [39]:
start_date = '2022-09-01'
end_date = '2024-06-30'
index = 'I:NDX'

### Gather Stock data through Polygon.ai and clean it using Pandas df

* Convert date from unix timestamp to datetime object
* Rename Columns     

In [40]:

def polygon(index_ticker, date_start, date_end):
    # Set up the GET request
    polygon_url = 'https://api.polygon.io/v2/aggs/ticker'
    ticker = f'/{index_ticker}'
    timespan = '/range/1/month'
    date_from = f'/{date_start}'
    date_to = f'/{date_end}'
    #params = '?adjusted=true&sort=asc&'
    polygon_key = os.environ['polygon_api_key']

    params = {
            'sort': 'asc',
            'apiKey': polygon_key
            }

    full_url = polygon_url + ticker + timespan + date_from + date_to

    # Request the data
    response2 = requests.get(full_url, params)
    polygon_data = response2.json()

    # Access the "results" part of the response
    results = polygon_data["results"]
    
    df_polygon = pd.DataFrame(results)
    df_polygon.head(5)

    # Convert the Unix timestamp to datetime
    df_polygon['date'] = pd.to_datetime(df_polygon['t'], unit='ms')

    # Format the datetime to the desired string format (YYYY-MM-DD)
    df_polygon['date'] = df_polygon['date'].dt.strftime('%Y-%m-%d')

    # Drop the unix time column
    df_polygon = df_polygon.drop(columns=['t'])

    # Rename cols
    df_polygon.rename(columns={'o':'open', 'c':'close', 'h':'high', 'l':'low'}, inplace=True)

    df_polygon = df_polygon[['date', 'open', 'high', 'low', 'close']]
    
    # Convert date from object to datetime
    df_polygon[['date']] = df_polygon[['date']].apply(pd.to_datetime)
    
    # Round to 2 decimal pts
    df_polygon['open'] = df_polygon['open'].round(2)
    df_polygon['close'] = df_polygon['close'].round(2)
    df_polygon['high'] = df_polygon['high'].round(2)
    df_polygon['low'] = df_polygon['low'].round(2)
    
    return df_polygon


In [41]:
df_1 = polygon(index, start_date, end_date)

df_1


Unnamed: 0,date,open,high,low,close
0,2023-02-01,12085.67,12159.64,11900.84,12042.12
1,2023-03-01,12026.72,13188.83,11695.41,13181.35
2,2023-04-01,13084.1,13247.39,12724.24,13245.99
3,2023-05-01,13229.11,14520.17,12938.45,14254.09
4,2023-06-01,14269.09,15284.65,14219.94,15179.21
5,2023-07-01,15190.54,15932.05,14924.64,15757.0
6,2023-08-01,15696.98,15748.43,14557.83,15501.07
7,2023-09-01,15600.95,15618.85,14432.6,14715.24
8,2023-10-01,14717.9,15333.98,14058.33,14409.78
9,2023-11-01,14453.68,16161.82,14441.89,15947.87


## Data Validation
- Summary statistics
- Comparing to other source (Alpha Vantage)

### Check for Null and NaN Values through through summary statistics

In [42]:
# Check for nulls
def null_check(df):
    null_counts = df.isnull().sum()
    return(null_counts)

In [43]:
df_1 = polygon(index, start_date, end_date)


print(null_check(df_1))



date     0
open     0
high     0
low      0
close    0
dtype: int64



### Compare the dataframes obtained
* If the data matches, load it to our SQL database
* If some of the data doesn't match, replace it with the average of both dfs, then store in SQL database

In [44]:
df_1['symbol'] = 'NASDAQ'
df_1

Unnamed: 0,date,open,high,low,close,symbol
0,2023-02-01,12085.67,12159.64,11900.84,12042.12,NASDAQ
1,2023-03-01,12026.72,13188.83,11695.41,13181.35,NASDAQ
2,2023-04-01,13084.1,13247.39,12724.24,13245.99,NASDAQ
3,2023-05-01,13229.11,14520.17,12938.45,14254.09,NASDAQ
4,2023-06-01,14269.09,15284.65,14219.94,15179.21,NASDAQ
5,2023-07-01,15190.54,15932.05,14924.64,15757.0,NASDAQ
6,2023-08-01,15696.98,15748.43,14557.83,15501.07,NASDAQ
7,2023-09-01,15600.95,15618.85,14432.6,14715.24,NASDAQ
8,2023-10-01,14717.9,15333.98,14058.33,14409.78,NASDAQ
9,2023-11-01,14453.68,16161.82,14441.89,15947.87,NASDAQ


In [45]:
# Define the connection string for Windows Authentication
server = 'MoemenLaptop'
database = 'InvestmentPortfolio'
connection_string = f'DRIVER={{ODBC Driver 17 for SQL Server}};SERVER={server};DATABASE={database};Trusted_Connection=yes;'

# Establish the connection
try:
    conn = pyodbc.connect(connection_string)
    print("Connection successful!")
    cursor = conn.cursor()

    # Iterate over rows of the DataFrame
    for index, row in df_1.iterrows():
        # Example SQL insert command
        sql_command = "INSERT INTO index_monthly (date, [open], high, low, [close], symbol) VALUES (?, ?, ?, ?, ?, ?)"
        
        # Execute the command
        cursor.execute(sql_command, tuple(row))

    # Commit the transaction
    conn.commit()
    
    print("Records Inserted!")

except Exception as e:
    print(f"Error: {e}")
    
    
conn.close()

Connection successful!
Records Inserted!
