In [1]:
import os
import pandas as pd
from decimal import Decimal
import numpy as np
from datetime import datetime, timedelta
from dotenv import load_dotenv
import yfinance as yf


import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objs as go
import pytz  # Make sure to import pytz for timezone handling
import seaborn as sns


import requests
import csv
import json

import warnings

In [2]:
# # yahoo finance free version

# today_date = datetime.today()
# window_days = 365*10 # This is 10 years in days window

# # Define the list of stock symbols
# stocks_list = [
#     'SPY'
#     ,'CNI'
#     ,'PM'
#     # ,'COST'
#     # ,'WMT'

#     ]

# # Fetch the data
# data = yf.download(
#     stocks_list
#     ,start=today_date - timedelta(days=window_days)
#     ,end=today_date)



# # The data contains multi-level columns, we'll focus on 'Adj Close' for adjusted closing prices
# adj_close = data['Adj Close']

# # Perform standardization using StandardScaler
# scaler = StandardScaler()
# standardized_data = scaler.fit_transform(adj_close)

# # Convert the normalized data back to a DataFrame
# standardized_data = pd.DataFrame(standardized_data, index=adj_close.index, columns=adj_close.columns)


# # Plotting the adjusted closing prices of the stocks
# plt.figure(figsize=(14, 7))

# for stock in stocks_list:
#     plt.plot(standardized_data[stock], label=stock)

# plt.title('Stock Prices Over Time')
# plt.xlabel('Date')
# plt.ylabel('Adjusted Closing Price')
# plt.legend()
# plt.grid(True)
# plt.show()


In [3]:
load_dotenv()

API_KEY = os.getenv("alpha_vantage_api_key")

In [4]:
# Addtional setting session
# Set display options to show all rows and columns
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
warnings.filterwarnings("ignore")
pd.options.mode.copy_on_write = True

In [5]:
# Parameters section

alpha_vantage_api_key = API_KEY # FREE TIER API rate limit is 25 requests per day
ticker_symbols = [
    # 'XOM'
    # ,'PM'
    # ,'CNI'
    # ,'VZ'
    # ,'JNJ'
    # ,'XOM'
    # ,'WM'
    # ,'CB'
    # ,'TRV'
    # ,'BRK.B'


    # energy
    'XOM'
    ,'OXY'
    ,'CVX'

 ]

window_days = 365*20
start_date = datetime.today()
end_date = start_date - timedelta(days=window_days)

stock_return_consolidate_df = pd.DataFrame()
stock_daily_ts_consolidate_df = pd.DataFrame()

In [7]:
for symbol in ticker_symbols:

    # STOCK SPLIT FACTOR section
    url = f'https://www.alphavantage.co/query?function=SPLITS&symbol={symbol}&apikey={alpha_vantage_api_key}'
    r = requests.get(url)
    data = r.json()

    for key, value in data.items():
        if key == 'data':
            if len(value) > 0:
                stock_split_record_df = pd.DataFrame(value)
                stock_split_record_df['split_factor'] = pd.to_numeric(stock_split_record_df['split_factor'], errors='coerce') # change split_factor series to numeric data
                stock_split_record_df['effective_date'] = pd.to_datetime(stock_split_record_df['effective_date'])
            else:
                stock_split_record_df = pd.DataFrame()
                stock_split_record_df['split_factor'] = 1
                stock_split_record_df['effective_date'] = datetime.today()


    # Daily quote section
    # replace the "demo" apikey below with your own key from https://www.alphavantage.co/support/#api-key
    url = f'https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol={symbol}&apikey={alpha_vantage_api_key}&outputsize=full'
    r = requests.get(url)
    data = r.json()

    for key, value in data.items():
        if key == 'Time Series (Daily)':


            selected_cols = [
                '4. close'
            ]

            Daily_stock_df = pd.DataFrame(value).transpose()[selected_cols] # tranpose the dataframe and sub select selected cols

            # Rename columns
            Daily_stock_df.rename(
                columns={
                    '4. close': 'stock_price'
                    }
                ,inplace=True
                )
            
            Daily_stock_df["stock_price"] = Daily_stock_df["stock_price"].astype(str).apply(lambda x: float(x))
            Daily_stock_df["stock_price"] = Daily_stock_df["stock_price"].round(2)
            Daily_stock_df.index = pd.to_datetime(Daily_stock_df.index)


    for date_i in Daily_stock_df.index.date:
        for date_j in stock_split_record_df['effective_date'].dt.date:
            if date_i == date_j:

                # stock price to divided the split factor
                Daily_stock_df.loc[Daily_stock_df.index.date < date_j, 'stock_price'] /= (stock_split_record_df['split_factor'][stock_split_record_df['effective_date'].dt.date == date_j].values[0])


    # Dividend section
    # replace the "demo" apikey below with your own key from https://www.alphavantage.co/support/#api-key
    url = f'https://www.alphavantage.co/query?function=DIVIDENDS&symbol={symbol}&apikey={alpha_vantage_api_key}'
    r = requests.get(url)
    data = r.json()

    for key, value in data.items():
        if key == 'data':
            stock_dividend_df = pd.DataFrame(value)
            if len(value) > 0:
                stock_dividend_df = stock_dividend_df[['ex_dividend_date', 'amount']]

                # Rename columns
                stock_dividend_df.rename(
                        columns={
                            'ex_dividend_date': f'{symbol}_div_date'
                            ,'amount': f'{symbol}_div_amount'
                            }
                        ,inplace=True
                        )
                
                stock_dividend_df[f'{symbol}_div_amount'] = stock_dividend_df[f'{symbol}_div_amount'].astype(str).apply(lambda x: float(x))
                stock_dividend_df[f'{symbol}_div_amount'] = stock_dividend_df[f'{symbol}_div_amount'].round(2)
            else:
                stock_dividend_df[f'{symbol}_div_date'] = 0
                stock_dividend_df[f'{symbol}_div_amount'] = 0


    # stock_dividend_df.index = stock_dividend_df[f'{symbol}_div_date']



    # WTI oil price intervel can be daily, weekly, monthly
    url = f'https://www.alphavantage.co/query?function=WTI&interval=daily&apikey={alpha_vantage_api_key}'
    r = requests.get(url)
    data = r.json()

    for key, value in data.items():
        if key == 'data':
            WTI_daily_price_df = pd.DataFrame(value)

            # error index are the indexes which doesnot have a valid price, it shows '.'
            error_index = WTI_daily_price_df[WTI_daily_price_df['value']=='.'].index

            # Calculate the average of the previous and next prices
            previous_WTI_price = WTI_daily_price_df.loc[error_index + 1, 'value']
            next_WTI_price = WTI_daily_price_df.loc[error_index - 1, 'value']
            average_WTI_price = (previous_WTI_price + next_WTI_price) / 2

            # Update the DataFrame with the calculated average price
            WTI_daily_price_df.loc[error_index, 'value'] = average_WTI_price

            # transfer the string value to decimal
            WTI_daily_price_df['value'] = WTI_daily_price_df['value'].astype(str).apply(lambda x: float(x))

            WTI_daily_price_df.rename(
                columns={
                    'value':'WTI'
                    ,'date':'Date'
                }
                ,inplace=True
            )

            WTI_daily_price_df['Date'] = pd.to_datetime(WTI_daily_price_df['Date'])





    # Time window filter applied
    Daily_stock_df_filter_timewindow = Daily_stock_df[(Daily_stock_df.index.date >= end_date.date())
                                                & (Daily_stock_df.index.date <= start_date.date())
                                                ]
    
    stock_dividend_df_filter_timewindow = stock_dividend_df[(pd.to_datetime(stock_dividend_df[f'{symbol}_div_date']) >= pd.to_datetime(end_date))
                                                    & (pd.to_datetime((stock_dividend_df[f'{symbol}_div_date'])) <= pd.to_datetime(start_date))
                                                    ]
    

#     # Calculate the return %
#     # return including dividend = (current_price - initial_price + total_dividend_during_period) / initial_price
    stock_return = round(
        (
            (
                Daily_stock_df_filter_timewindow[f'stock_price'][0] 
                - Daily_stock_df_filter_timewindow[f'stock_price'][-1] 
                + stock_dividend_df_filter_timewindow[f'{symbol}_div_amount'].sum()
            ) 
            / Daily_stock_df_filter_timewindow[f'stock_price'][-1]
        )
    ,4
    )

    # assign value to columns in consolidate dataframe
    stock_return_consolidate_df.loc[0,f'{symbol}_{window_days/365}_yrs_return'] = stock_return
    stock_daily_ts_consolidate_df[f'{symbol}'] = Daily_stock_df_filter_timewindow[f'stock_price']


####### WTI merge step ###########
# Step to merge the WTI crude oil price into the stock_daily_ts_consolidate_df
stock_daily_ts_consolidate_df = stock_daily_ts_consolidate_df.merge(
    WTI_daily_price_df
    ,left_on=stock_daily_ts_consolidate_df.index
    ,right_on='Date'
    ,how='left'
    )

# clean the merged WTI dataframe 
stock_daily_ts_consolidate_df.index = stock_daily_ts_consolidate_df['Date']
stock_daily_ts_consolidate_df = stock_daily_ts_consolidate_df.drop(
    columns='Date'
) \

####### WTI merge step ###########


stock_daily_ts_consolidate_normalized_df = stock_daily_ts_consolidate_df / stock_daily_ts_consolidate_df.iloc[-1]

In [8]:
stock_return_consolidate_df.transpose().sort_values(
    by=stock_return_consolidate_df.transpose().columns[0]
    ,ascending=False
    )

Unnamed: 0,0
CVX_20.0_yrs_return,3.7328
OXY_20.0_yrs_return,2.7533
XOM_20.0_yrs_return,2.7037


In [9]:
stock_daily_ts_consolidate_normalized_df = stock_daily_ts_consolidate_df / stock_daily_ts_consolidate_df.iloc[-1]


# Create a Plotly figure
fig = go.Figure()

# Add traces for each stock
for column in stock_daily_ts_consolidate_normalized_df.columns:
    fig.add_trace(go.Scatter(
        x=stock_daily_ts_consolidate_normalized_df.index,
        y=stock_daily_ts_consolidate_normalized_df[column],
        mode='lines',
        name=column
    ))

# Customize layout
fig.update_layout(
    title=f'Normalized Stock Prices in {window_days/365} years',
    xaxis_title='Date',
    yaxis_title='Normalized Price',
    legend_title='Stock',
    template='plotly_white'
)

# Show the plot
fig.show()

In [10]:
# beer sales volume analysis

url = f'https://www.ttb.gov/media/79096/download?inline'
r = requests.get(url)
data = r.json()

for key, value in data.items():
    # print(key)

    if key == 'Monthly Data':
        US_beer_df = pd.DataFrame(value)

        selected_cols = [
                        'CY_Month_Number'
                        ,'Year'
                        ,'Statistical_Group'
                        # ,'Count_IMs' # counts of industry members
                        ,'Value'
                        ]

        filter_1 = US_beer_df['Statistical_Group'].isin(['1-Production', '2-Removals', '4-Stocks On Hand End-of-Month'])
        filter_2 = US_beer_df['Statistical_Category'].isin(['0-Category Total'])


        US_beer_df_filtered = US_beer_df[selected_cols][
                                                        (filter_1) 
                                                        & (filter_2)
                                                        ]


        # Convert Year and Month into a single datetime column for better plotting
        US_beer_df_filtered['Year_Month'] = pd.to_datetime(US_beer_df_filtered['Year'].astype(str) + '-' + US_beer_df_filtered['CY_Month_Number'].astype(str) + '-01')



# Create the plot
fig = px.line(
    US_beer_df_filtered
    ,x='Year_Month'
    ,y='Value'
    ,color='Statistical_Group'
    ,title='Line Chart of US Beers by Statistical Group'
    ,labels={
        'Year_Month': 'Year-Month'
        ,'Value': 'Volume'
        }
                  )

fig.show()