# <u> DATA RETREIVAL</u>

### <u> Requisits </u>

In [2]:
import os
import numpy as np 
import pandas as pd

### <u> Fetching specific tickers </u>

In [6]:
def fetch_tickers(tickers, index, start_date, end_date):
    
    #Initializing DataFrame:
    date_range = pd.date_range(start=start_date, end=end_date)
    data = pd.DataFrame(date_range, columns=['Date'])
    data.set_index('Date', inplace=True)

    for ticker in tickers:
        
        file_path = os.path.join('stock_market_data', index, 'csv', f'{ticker}.csv')
        ticker_data = pd.read_csv(file_path, usecols=['Date', 'Adjusted Close'])
        ticker_data['Date'] = pd.to_datetime(ticker_data['Date'], dayfirst=True, errors='coerce')
        
        # Drop rows with invalid dates (if any)
        ticker_data.dropna(subset=['Date'], inplace=True)
        
        # Set 'Date' as the index
        ticker_data.set_index('Date', inplace=True)
        
        # Reindex to match the desired date range and join with the main DataFrame
        data = data.join(ticker_data.rename(columns={'Adjusted Close': ticker}), how='left')

    return data.reset_index()

In [7]:
result = fetch_tickers(['MSFT', 'AAPL'], 'nasdaq', '2020-01-01', '2020-12-31')
result

Unnamed: 0,Date,MSFT,AAPL
0,2020-01-01,,
1,2020-01-02,158.205765,73.561546
2,2020-01-03,156.235825,72.846375
3,2020-01-04,,
4,2020-01-05,,
...,...,...,...
361,2020-12-27,,
362,2020-12-28,223.925995,135.068573
363,2020-12-29,223.119720,133.270157
364,2020-12-30,220.661072,132.133820


### <u> Fetching all tickers </u>

In [24]:
def fetch_all(index, start_date, end_date):
    # Construct the directory path
    directory_path = os.path.join('stock_market_data', index, 'csv')
    
    # Get all .csv files in the directory
    csv_files = [f for f in os.listdir(directory_path) if f.endswith('.csv')]
    
    # Extract ticker names from file names (remove the '.csv' extension)
    tickers = [os.path.splitext(f)[0] for f in csv_files]
    
    # Create an empty DataFrame with the Date column containing all dates in the range
    date_range = pd.date_range(start=start_date, end=end_date)
    data = pd.DataFrame(date_range, columns=['Date'])
    data.set_index('Date', inplace=True)

    for ticker in tickers:
        # Construct the full file path
        file_path = os.path.join(directory_path, f'{ticker}.csv')
        
        # Read the CSV file and extract the required columns
        ticker_data = pd.read_csv(file_path, usecols=['Date', 'Adjusted Close'])
        
        # Convert 'Date' column to datetime, specifying dayfirst=True for correct parsing
        ticker_data['Date'] = pd.to_datetime(ticker_data['Date'], dayfirst=True, errors='coerce')
        
        # Drop rows with invalid dates (if any)
        ticker_data.dropna(subset=['Date'], inplace=True)
        
        # Set 'Date' as the index
        ticker_data.set_index('Date', inplace=True)
        
        # Reindex to match the desired date range and join with the main DataFrame
        data = data.join(ticker_data.rename(columns={'Adjusted Close': ticker}), how='left')

    return data.reset_index()

# Example usage
result = fetch_all('nasdaq', '2022-01-01', '2022-12-31')
print(result)


          Date       CSCO  BIOS  CSBK       SBGI       UFCS       AIRT  ISSC  \
0   2022-01-01        NaN   NaN   NaN        NaN        NaN        NaN   NaN   
1   2022-01-02        NaN   NaN   NaN        NaN        NaN        NaN   NaN   
2   2022-01-03  61.237827   NaN   NaN  26.920000  23.709999  25.010000  6.57   
3   2022-01-04  59.735897   NaN   NaN  26.799999  23.980000  25.150000  6.47   
4   2022-01-05  58.789871   NaN   NaN  25.980000  23.680000  25.290001  6.59   
..         ...        ...   ...   ...        ...        ...        ...   ...   
362 2022-12-27        NaN   NaN   NaN        NaN        NaN        NaN   NaN   
363 2022-12-28        NaN   NaN   NaN        NaN        NaN        NaN   NaN   
364 2022-12-29        NaN   NaN   NaN        NaN        NaN        NaN   NaN   
365 2022-12-30        NaN   NaN   NaN        NaN        NaN        NaN   NaN   
366 2022-12-31        NaN   NaN   NaN        NaN        NaN        NaN   NaN   

     KNDI   BCMXY  ...        BCS      