In [1]:
#import the necessary libraries(data manipulation, visualization, financial analysis)
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import yfinance as yf
import talib as ta
import pynance as pn

sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (14, 6)

In [2]:
# Load GOOG data from the yfinance_data directory
df = pd.read_csv("../yfinance_data/META.csv")
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)

ticker = 'META'
numeric_cols = ['Open', 'High', 'Low', 'Close', 'Volume']
for col in numeric_cols:
    df[col] = df[col].astype(float)

first_row = pd.DataFrame({
    'Open': [df['Open'].iloc[0]], 
    'High': [df['High'].iloc[0]], 
    'Low': [df['Low'].iloc[0]], 
    'Close': [df['Close'].iloc[0]], 
    'Volume': [df['Volume'].iloc[0]],
    'Ticker': [ticker]  # ticker in first row
}, index=[df.index[0]])

df['Ticker'] = None

df = pd.concat([first_row, df.iloc[1:]])

print(df.head())

                 Open       High        Low      Close       Volume Ticker
2012-05-18  41.792356  44.724282  37.767172  37.995762  573576400.0   META
2012-05-21  36.306177  36.435382  32.797807  33.821495  168192700.0   None
2012-05-22  32.410203  33.384198  30.750434  30.810066  101786600.0   None
2012-05-23  31.177799  32.300874  31.167860  31.803938   73600000.0   None
2012-05-24  32.748117  33.006523  31.575347  32.827625   50237200.0   None


In [3]:
# Clean column names if they are MultiIndex

if isinstance(df.columns, pd.MultiIndex):
    df.columns = df.columns.get_level_values(0)  
print("Cleaned columns:", df.columns.tolist())
df["Ticker"] = ticker

df_numeric_index = df[df.index != 'Ticker'].copy()
df_numeric_index.index = pd.to_datetime(df_numeric_index.index)
df_numeric_index = df_numeric_index.sort_index()

print(df_numeric_index.head())

Cleaned columns: ['Open', 'High', 'Low', 'Close', 'Volume', 'Ticker']
                 Open       High        Low      Close       Volume Ticker
2012-05-18  41.792356  44.724282  37.767172  37.995762  573576400.0   META
2012-05-21  36.306177  36.435382  32.797807  33.821495  168192700.0   META
2012-05-22  32.410203  33.384198  30.750434  30.810066  101786600.0   META
2012-05-23  31.177799  32.300874  31.167860  31.803938   73600000.0   META
2012-05-24  32.748117  33.006523  31.575347  32.827625   50237200.0   META


In [4]:
# Basic Exploratory Data Analysis
print(df.info())
print(df.describe())
print(df.isnull().sum())  
df.dropna(inplace=True)  

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 2923 entries, 2012-05-18 to 2023-12-29
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Open    2923 non-null   float64
 1   High    2923 non-null   float64
 2   Low     2923 non-null   float64
 3   Close   2923 non-null   float64
 4   Volume  2923 non-null   float64
 5   Ticker  2923 non-null   object 
dtypes: float64(5), object(1)
memory usage: 159.9+ KB
None
              Open         High          Low        Close        Volume
count  2923.000000  2923.000000  2923.000000  2923.000000  2.923000e+03
mean    156.653626   158.690484   154.685098   156.727440  3.060615e+07
std      90.308834    91.470736    89.222981    90.368766  2.637157e+07
min      17.969224    18.158061    17.442471    17.621368  5.467500e+06
25%      80.275126    80.970839    79.663892    80.275124  1.633730e+07
50%     152.698669   154.636700   150.730785   152.579376  2.295390e+07
75%     202.96378