# Accesing and merging multiple CSV files

In [3]:
import glob
import os
import pandas as pd
import plotly.express as px

# Path to CSV files
csv_directory = '../data/yfinance_data/'

# Verify the directory and files
print("Directory contents:", os.listdir(csv_directory))
csv_files = glob.glob(os.path.join(csv_directory, '*.csv'))
print("CSV files found:", csv_files)

# Load and concatenate all CSV files into one DataFrame
data_frames = [pd.read_csv(file) for file in csv_files]
if not data_frames:
    raise ValueError("No data frames to concatenate.")

combined_df = pd.concat(data_frames, ignore_index=True)

# Display the first few rows of the combined DataFrame
print(combined_df.head())

Directory contents: ['AAPL_historical_data.csv', 'NVDA_historical_data.csv', 'MSFT_historical_data.csv', 'TSLA_historical_data.csv', 'GOOG_historical_data.csv', 'AMZN_historical_data.csv', 'META_historical_data.csv']
CSV files found: ['../data/yfinance_data/AAPL_historical_data.csv', '../data/yfinance_data/NVDA_historical_data.csv', '../data/yfinance_data/MSFT_historical_data.csv', '../data/yfinance_data/TSLA_historical_data.csv', '../data/yfinance_data/GOOG_historical_data.csv', '../data/yfinance_data/AMZN_historical_data.csv', '../data/yfinance_data/META_historical_data.csv']
         Date      Open      High       Low     Close  Adj Close     Volume  \
0  1980-12-12  0.128348  0.128906  0.128348  0.128348   0.098943  469033600   
1  1980-12-15  0.122210  0.122210  0.121652  0.121652   0.093781  175884800   
2  1980-12-16  0.113281  0.113281  0.112723  0.112723   0.086898  105728000   
3  1980-12-17  0.115513  0.116071  0.115513  0.115513   0.089049   86441600   
4  1980-12-18  0.118

# Preprocessing the Combined data

In [4]:
# Load and concatenate all CSV files into one DataFrame
data_frames = [pd.read_csv(file) for file in csv_files]
if not data_frames:
    raise ValueError("No data frames to concatenate.")

combined_df = pd.concat(data_frames, ignore_index=True)

# Display the column names 
print("Columns in combined_df:", combined_df.columns)

# Convert 'Date' column to datetime format
if 'Date' in combined_df.columns:
    combined_df['Date'] = pd.to_datetime(combined_df['Date'], errors='coerce')  # Use errors='coerce' to handle any invalid date formats
    combined_df.set_index('Date', inplace=True)
else:
    print("Column 'Date' not found in the DataFrame.")

# Display the DataFrame structure to confirm it is ready for TA-Lib
print(combined_df.head())

Columns in combined_df: Index(['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume',
       'Dividends', 'Stock Splits'],
      dtype='object')
                Open      High       Low     Close  Adj Close     Volume  \
Date                                                                       
1980-12-12  0.128348  0.128906  0.128348  0.128348   0.098943  469033600   
1980-12-15  0.122210  0.122210  0.121652  0.121652   0.093781  175884800   
1980-12-16  0.113281  0.113281  0.112723  0.112723   0.086898  105728000   
1980-12-17  0.115513  0.116071  0.115513  0.115513   0.089049   86441600   
1980-12-18  0.118862  0.119420  0.118862  0.118862   0.091630   73449600   

            Dividends  Stock Splits  
Date                                 
1980-12-12        0.0           0.0  
1980-12-15        0.0           0.0  
1980-12-16        0.0           0.0  
1980-12-17        0.0           0.0  
1980-12-18        0.0           0.0  


# Apply Analysis Indicators with TA-Lib

In [1]:
import talib as ta
combined_df['SMA_20'] = ta.SMA(combined_df['Close'], timeperiod=20)
combined_df['RSI_14'] = ta.RSI(combined_df['Close'], timeperiod=14)
combined_df['MACD'], combined_df['MACD_signal'], combined_df['MACD_hist'] = ta.MACD(combined_df['Close'])

# Display the DataFrame with added indicators
print(combined_df.head())

ModuleNotFoundError: No module named 'talib'

# Visualiazation

In [7]:
# Plot stock price with SMA
fig = px.line(combined_df, x=combined_df.index, y=['Close', 'SMA_20'], title='Stock Price with 20-Day SMA')
fig.show()

# Plot RSI
fig = px.line(combined_df, x=combined_df.index, y='RSI_14', title='Relative Strength Index (RSI)')
fig.add_hline(y=70, line_dash="dash", line_color="red")
fig.add_hline(y=30, line_dash="dash", line_color="green")
fig.show()

# Plot MACD
fig = px.line(combined_df, x=combined_df.index, y=['MACD', 'MACD_signal'], title='MACD')
fig.show()

[31mERROR: TA_Lib-0.4.32-cp311-cp311-win_amd64.whl is not a supported wheel on this platform.[0m[31m
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.
