In [1]:
import yfinance as yf
import pandas as pd
import pandas_ta as ta
import numpy as np

In [11]:
def download_historic_data(ticker):
    """It saves the data from 2006 to current date
    in csv file in current directory"""
    
    from datetime import datetime
    current_date = datetime.now()
    current_date = current_date.strftime("%Y-%m-%d")
    
    data = yf.download(ticker, start="2000-01-01", end="2024-12-30")
    data.reset_index(inplace=True)

    data = data.rename(columns={
        'Date': 'date',
        'Open': 'open',
        'High': 'high',
        'Low': 'low',
        'Close': 'close',
        'Volume': 'volume'
    })
    data['date']=pd.to_datetime(data['date'])
    

    csv_file = f'raw data/{ticker}_data.csv'
    data.to_csv(csv_file, index=False)  # index=False to exclude the default pandas index
    del_sec_row(csv_file)
    print(f"TCS data saved to {csv_file}")
   

def del_sec_row(file_path):
    import csv
    
    # Read and write in one go, skipping the second row
    with open(file_path, mode="r", newline="", encoding="utf-8") as file:
        rows = list(csv.reader(file))  # Load all rows
    
    # Remove the second row
    if len(rows) > 1:
        rows.pop(1)
    
    # Save the updated rows back to the file
    with open(file_path, mode="w", newline="", encoding="utf-8") as file:
        csv.writer(file).writerows(rows)
    
    
    

In [15]:
download_historic_data("MRF.NS")

[*********************100%***********************]  1 of 1 completed

TCS data saved to raw data/MRF.NS_data.csv





In [17]:
def technical_generator(ticker):
    file_path=f'raw data/{ticker}_data.csv'
    data=pd.read_csv(file_path)
    df=data


    data['date'] = pd.to_datetime(data['date'])
    data['year'] = data['date'].dt.year
    data['month'] = data['date'].dt.month
    data['day'] = data['date'].dt.day
    data['dayofweek'] = data['date'].dt.dayofweek
    data['quarter'] = data['date'].dt.quarter
    data['dayofyear'] = data['date'].dt.dayofyear


    # Add Simple Moving Averages (SMA)
    data['SMA_5'] = ta.sma(data['close'], length=5)
    data['SMA_8'] = ta.sma(data['close'], length=8)
    df['SMA_13'] = ta.sma(df['close'], length=13)
    df['SMA_21'] = ta.sma(df['close'], length=21)
    df['SMA_50'] = ta.sma(df['close'], length=50)
   
    
    # Add Exponential Moving Averages (EMA)
    data['EMA_5'] = ta.ema(data['close'], length=5)
    data['EMA_8'] = ta.ema(data['close'], length=8)
    data['EMA_13'] = ta.ema(data['close'], length=13)
    df['EMA_21'] = ta.ema(df['close'], length=21)
    df['EMA_50'] = ta.ema(df['close'], length=50)
 
    
    # Add Stochastic Oscillator (%K and %D)
    stoch = ta.stoch(df['high'], df['low'], df['close'], k=14, d=3)
    df['Stoch_%K'] = stoch['STOCHk_14_3_3']
    df['Stoch_%D'] = stoch['STOCHd_14_3_3']
    
    # Add Average Directional Index (ADX)
    df['ADX'] = ta.adx(df['high'], df['low'], df['close'], length=14)['ADX_14']

    # Add Lag Features
    df['Close_lag1'] = df['close'].shift(1)
    data['close_lag2'] = data['close'].shift(2)

    
    # Add Daily Returns and Log Returns
    df['Daily_Return'] = df['close'].pct_change()
    df['Log_Return'] = np.log(df['close'] / df['close'].shift(1))



    
    # date is as index    
    data.set_index('date', inplace=True)
    

    # Assuming your data is loaded in the DataFrame 'data'
    # Ensure the date column is in datetime format and set it as index
    
    
    # Assuming 'data' is your DataFrame and 'date' is the index
    # Ensure the 'close' column is properly formatted as numeric
    data['close'] = pd.to_numeric(data['close'], errors='coerce')
    
    # Fill any NaN values that may appear (using .ffill)
    data['close'] = data['close'].ffill()
    
    
    
    # Calculate MACD using pandas_ta
    macd_result = ta.macd(data['close'], fast=12, slow=26, signal=9)
    
    # Add MACD components to the data
    data['MACD'] = macd_result['MACD_12_26_9']
    data['MACD_signal'] = macd_result['MACDs_12_26_9']
    data['MACD_hist'] = macd_result['MACDh_12_26_9']
    
    # 2. Calculate RSI (Relative Strength Index)
    data['RSI'] = ta.rsi(data['close'], length=14)
    
    # Step 2: Recalculate VWAP after setting 'date' as the index
    data['VWAP'] = ta.vwap(high=data['high'], low=data['low'], close=data['close'], volume=data['volume'])
    
    # Calculate Bollinger Bands (returns a DataFrame, not individual series)
    bbands = ta.bbands(data['close'], length=20, std=2)
    
    
    # Extract the individual bands from the result
    data['BB_upper'] = bbands['BBL_20_2.0']
    data['BB_middle'] = bbands['BBM_20_2.0']
    data['BB_lower'] = bbands['BBU_20_2.0']

    
    # 4. Calculate On-Balance Volume (OBV)
    data['OBV'] = ta.obv(data['close'], data['volume'])

    
    # Default parameters for AF (Acceleration Factor) are 0.02 and maximum AF is 0.2
    # Calculate Parabolic SAR
    # pandas_ta.psar returns multiple columns: `PSARl_0.02_0.2`, `PSARs_0.02_0.2`, and `PSAR_0.02_0.2`
    psar = ta.psar(data['high'], data['low'], data['close'], step=0.02, max_step=0.2)

    
    # Combine PSARl and PSARs into a single column
    data['PSAR'] = psar['PSARl_0.02_0.2'].combine_first(psar['PSARs_0.02_0.2'])


    # 5. Calculate ATR (Average True Range)
    data['ATR'] = ta.atr(data['high'], data['low'], data['close'], length=14)



    


    # Calculate Ichimoku Cloud
    ichimoku = ta.ichimoku(data['high'], data['low'], data['close'], window1=9, window2=26, window3=52)
    # Extract Ichimoku components
    data['Tenkan-sen'] = ichimoku[0]['ITS_9']  # Conversion Line
    data['Kijun-sen'] = ichimoku[0]['IKS_26']  # Base Line
    data['Chikou Span'] = ichimoku[0]['ICS_26']  # Lagging Span

        

    # Calculate CCI Commodity Channel Index (CCI) 
    data['CCI'] = ta.cci(data['high'], data['low'], data['close'], length=20)



    # Reset index to make 'date' a regular column
    data = data.reset_index()


    # data=data_reset.dropna() 
    
    # Save the DataFrame to a CSV file
    new_file=f'technical indicators/{ticker}_indicators.csv'
    data.to_csv(new_file, index=False)
    print(f"TCS data saved to {new_file}")
    return data

        

In [19]:
data=technical_generator("TCS.NS")

TCS data saved to technical indicators/TCS.NS_indicators.csv


In [11]:
import os
import pandas as pd
import numpy as np
import yfinance as yf
import pandas_ta as ta

def get_directory(base_dir, data_type):
    """Returns the correct directory path for storing data."""
    if data_type not in ['train', 'test']:
        raise ValueError("Invalid data_type. Choose 'train' or 'test'")
    
    raw_data_dir = os.path.join(base_dir, data_type, 'raw_data')
    tech_data_dir = os.path.join(base_dir, data_type, 'technical_indicators')
    os.makedirs(raw_data_dir, exist_ok=True)
    os.makedirs(tech_data_dir, exist_ok=True)
    
    return raw_data_dir, tech_data_dir

def download_historic_data(ticker, data_type='train', base_dir='data'):
    """Downloads historical stock data and saves it to the appropriate directory."""
    raw_data_dir, _ = get_directory(base_dir, data_type)
    csv_file = os.path.join(raw_data_dir, f'{ticker}_data.csv')
    
    data = yf.download(ticker, start="2023-12-01", end="2025-02-21")
    data.reset_index(inplace=True)
    
    data = data.rename(columns={'Date': 'date', 'Open': 'open', 'High': 'high', 'Low': 'low', 'Close': 'close', 'Volume': 'volume'})
    data['date'] = pd.to_datetime(data['date'])
    
    data.to_csv(csv_file, index=False)
    del_second_row(csv_file)
    print(f"Data saved to {csv_file}")

def del_second_row(file_path):
    """Removes the second row from a CSV file."""
    import csv
    with open(file_path, "r", newline="", encoding="utf-8") as file:
        rows = list(csv.reader(file))
    if len(rows) > 1:
        rows.pop(1)
    with open(file_path, "w", newline="", encoding="utf-8") as file:
        csv.writer(file).writerows(rows)

def generate_technical_indicators(ticker, data_type='train', base_dir='data'):
    """Loads raw stock data, computes technical indicators, and saves it."""
    raw_data_dir, tech_data_dir = get_directory(base_dir, data_type)
    
    file_path = os.path.join(raw_data_dir, f'{ticker}_data.csv')
    new_file = os.path.join(tech_data_dir, f'{ticker}_indicators.csv')

    data = pd.read_csv(file_path)

    data['date'] = pd.to_datetime(data['date'])
    data['year'] = data['date'].dt.year
    data['month'] = data['date'].dt.month
    data['day'] = data['date'].dt.day
    data['dayofweek'] = data['date'].dt.dayofweek
    data['quarter'] = data['date'].dt.quarter
    data['dayofyear'] = data['date'].dt.dayofyear

    # Add Simple Moving Averages (SMA)
    data['SMA_5'] = ta.sma(data['close'], length=5)
    data['SMA_8'] = ta.sma(data['close'], length=8)
    data['SMA_13'] = ta.sma(data['close'], length=13)
    data['SMA_21'] = ta.sma(data['close'], length=21)
    data['SMA_50'] = ta.sma(data['close'], length=50)
   
    # Add Exponential Moving Averages (EMA)
    data['EMA_5'] = ta.ema(data['close'], length=5)
    data['EMA_8'] = ta.ema(data['close'], length=8)
    data['EMA_13'] = ta.ema(data['close'], length=13)
    data['EMA_21'] = ta.ema(data['close'], length=21)
    data['EMA_50'] = ta.ema(data['close'], length=50)
 
    # Add Stochastic Oscillator (%K and %D)
    stoch = ta.stoch(data['high'], data['low'], data['close'], k=14, d=3)
    data['Stoch_%K'] = stoch['STOCHk_14_3_3']
    data['Stoch_%D'] = stoch['STOCHd_14_3_3']
    
    # Add Average Directional Index (ADX)
    data['ADX'] = ta.adx(data['high'], data['low'], data['close'], length=14)['ADX_14']

    # Add Lag Features
    data['Close_lag1'] = data['close'].shift(1)
    data['close_lag2'] = data['close'].shift(2)
    
    # Add Daily Returns
    data['Daily_Return'] = data['close'].pct_change()

    # date is as index    
    data.set_index('date', inplace=True)
     
    data['close'] = pd.to_numeric(data['close'], errors='coerce')  # Convert to float
    data['close'].fillna(method='ffill', inplace=True)  # Fill missing values

    
    # Calculate MACD using pandas_ta
    macd_result = ta.macd(data['close'], fast=12, slow=26, signal=9)
    
    # Add MACD components to the data
    data['MACD'] = macd_result['MACD_12_26_9']
    data['MACD_signal'] = macd_result['MACDs_12_26_9']
    data['MACD_hist'] = macd_result['MACDh_12_26_9']
    
    # 2. Calculate RSI (Relative Strength Index)
    data['RSI'] = ta.rsi(data['close'], length=14)
    
    # Step 2: Recalculate VWAP after setting 'date' as the index
    data['VWAP'] = ta.vwap(high=data['high'], low=data['low'], close=data['close'], volume=data['volume'])
    
    # # Calculate Bollinger Bands (returns a DataFrame, not individual series)
    # bbands = ta.bbands(data['close'], length=20, std=2)
     
    # # Extract the individual bands from the result
    # data['BB_upper'] = bbands['BBL_20_2.0']
    # data['BB_middle'] = bbands['BBM_20_2.0']
    # data['BB_lower'] = bbands['BBU_20_2.0']
    data['BB_upper'], data['BB_middle'], data['BB_lower'] = ta.bbands(data['close'], length=20)[['BBU_20_2.0', 'BBM_20_2.0', 'BBL_20_2.0']].T.values

    # 4. Calculate On-Balance Volume (OBV)
    data['OBV'] = ta.obv(data['close'], data['volume'])

    # Default parameters for AF (Acceleration Factor) are 0.02 and maximum AF is 0.2
    # Calculate Parabolic SAR
    # pandas_ta.psar returns multiple columns: `PSARl_0.02_0.2`, `PSARs_0.02_0.2`, and `PSAR_0.02_0.2`
    psar = ta.psar(data['high'], data['low'], data['close'], step=0.02, max_step=0.2)

    # Combine PSARl and PSARs into a single column
    data['PSAR'] = psar['PSARl_0.02_0.2'].combine_first(psar['PSARs_0.02_0.2'])

    # 5. Calculate ATR (Average True Range)
    data['ATR'] = ta.atr(data['high'], data['low'], data['close'], length=14)

    # Calculate Ichimoku Cloud
    ichimoku = ta.ichimoku(data['high'], data['low'], data['close'], window1=9, window2=26, window3=52)
    # Extract Ichimoku components
    data['Tenkan-sen'] = ichimoku[0]['ITS_9']  # Conversion Line
    data['Kijun-sen'] = ichimoku[0]['IKS_26']  # Base Line
    data['Chikou Span'] = ichimoku[0]['ICS_26']  # Lagging Span

    # Calculate CCI Commodity Channel Index (CCI) 
    data['CCI'] = ta.cci(data['high'], data['low'], data['close'], length=20)

    # Reset index to make 'date' a regular column
    data = data.reset_index()

    data.to_csv(new_file, index=False)
    print(f"Technical indicators saved to {new_file}")
    # return data

# Example Usage:
# download_historic_data('AAPL', data_type='test')
# generate_technical_indicators('AAPL', data_type='test')


In [13]:
def git_commit():
    from os import system
        # Push changes using git
    repo_dir = os.getcwd() 
    system(f"cd {repo_dir} && git add .")
    system(f'cd {repo_dir} && git commit -m "Commited via Python"')
    system(f"cd {repo_dir} && git push origin main")
        

In [15]:
download_historic_data('TCS.NS', data_type='test')
generate_technical_indicators('TCS.NS', data_type='test')


[*********************100%***********************]  1 of 1 completed

1 Failed download:
['TCS.NS']: JSONDecodeError('Expecting value: line 1 column 1 (char 0)')


Data saved to data\test\raw_data\TCS.NS_data.csv


TypeError: 'NoneType' object is not subscriptable

In [44]:
git_commit()

In [33]:
data = yf.download("MRF.NS", start="2023-12-01", end="2025-02-21") 

[*********************100%***********************]  1 of 1 completed

1 Failed download:
['MRF.NS']: JSONDecodeError('Expecting value: line 1 column 1 (char 0)')


In [21]:
stock = yf.Ticker("TCS.NS")

In [27]:
stock = yf.Ticker("TCS.NS")
df = stock.history(period="1d")  # Adjust period as needed
print(df)

$TCS.NS: possibly delisted; no price data found  (period=1d)


Empty DataFrame
Columns: [Open, High, Low, Close, Adj Close, Volume]
Index: []


In [35]:
pip install --upgrade yfinance


Collecting yfinance
  Downloading yfinance-0.2.54-py2.py3-none-any.whl.metadata (5.8 kB)
Downloading yfinance-0.2.54-py2.py3-none-any.whl (108 kB)
   ---------------------------------------- 0.0/108.7 kB ? eta -:--:--
   --- ------------------------------------ 10.2/108.7 kB ? eta -:--:--
   --- ------------------------------------ 10.2/108.7 kB ? eta -:--:--
   ----------- --------------------------- 30.7/108.7 kB 217.9 kB/s eta 0:00:01
   ----------- --------------------------- 30.7/108.7 kB 217.9 kB/s eta 0:00:01
   ------------------------- ------------- 71.7/108.7 kB 280.5 kB/s eta 0:00:01
   ------------------------- ------------- 71.7/108.7 kB 280.5 kB/s eta 0:00:01
   -------------------------------------- 108.7/108.7 kB 314.6 kB/s eta 0:00:00
Installing collected packages: yfinance
  Attempting uninstall: yfinance
    Found existing installation: yfinance 0.2.51
    Uninstalling yfinance-0.2.51:
      Successfully uninstalled yfinance-0.2.51
Successfully installed yfinance-0.2

In [36]:
from pandas_datareader import data as pdr
import yfinance as yf

yf.pdr_override()

try:
    df = pdr.get_data_yahoo("TCS.NS", period="1d")
    print(df)
except Exception as e:
    print("Error:", e)


ModuleNotFoundError: No module named 'pandas_datareader'