In [66]:
#https://www.nasdaq.com/market-activity/stocks/screener
import pandas as pd

# Load the CSV file into a DataFrame
df = pd.read_csv('tickers.csv')

# Extract the 'Symbol' column into a list
ticker_symbols = df['Symbol'].tolist()


In [None]:
import os
import pandas as pd
import yfinance as yf
from datetime import datetime, timedelta

# Directory to save the data
data_dir = "ticker_data"
os.makedirs(data_dir, exist_ok=True)

# Function to fetch and save data for a list of tickers
def fetch_and_save_data(tickers):
    end_date = datetime.now()
    start_date = end_date - timedelta(days=365*10)
    
    try:
        #print(f"Fetching data for tickers: {tickers}")
        data = yf.download(tickers, start=start_date, end=end_date, group_by='ticker')
        
        for ticker in tickers:
            ticker_data = data[ticker]
            if not ticker_data.empty:
                file_path = os.path.join(data_dir, f"{ticker}.csv")
                ticker_data.to_csv(file_path)
                print(f"Data for {ticker} saved successfully.")
            else:
                print(f"No data found for {ticker}.")
    except Exception as e:
        print(f"Error fetching data for tickers: {e}")

# Function to split the list into chunks of specified size
def chunk_list(lst, chunk_size):
    for i in range(0, len(lst), chunk_size):
        yield lst[i:i + chunk_size]

# Process tickers in chunks of 500
chunk_size = 500
for ticker_chunk in chunk_list(ticker_symbols, chunk_size):
    fetch_and_save_data(ticker_chunk)

print("Data fetching completed.")

In [6]:
import os
import pandas as pd

def load_and_resample_ticker_data():
    ticker_data_dir = 'ticker_data'
    weekly_data_dir = 'weekly_data'
    
    if not os.path.exists(weekly_data_dir):
        os.makedirs(weekly_data_dir)
    
    # List all CSV files in the ticker_data directory
    ticker_files = [f for f in os.listdir(ticker_data_dir) if f.endswith('.csv')]
    
    # Dictionary to store dataframes for each ticker
    ticker_dataframes = {}
    
    for file in ticker_files:
        # Extract the ticker symbol from the file name (assuming it's the file name without extension)
        ticker_symbol = os.path.splitext(file)[0]
        
        # Load the CSV file into a DataFrame
        df = pd.read_csv(os.path.join(ticker_data_dir, file))
        
        # Ensure the date column is in datetime format
        df['date'] = pd.to_datetime(df['date'])
        
        # Set the date column as the index
        df.set_index('date', inplace=True)
        
        # Store the DataFrame in the dictionary
        ticker_dataframes[ticker_symbol] = df

    # Concatenate all dataframes along the columns axis, aligning on the date index
    combined_df = pd.concat(ticker_dataframes.values(), axis=1, keys=ticker_dataframes.keys())
    
    # Select only numeric columns for resampling
    numeric_combined_df = combined_df.select_dtypes(include='number')
    
    # Resample the combined dataframe to weekly frequency, using the mean for example
    weekly_combined_df = numeric_combined_df.resample('W').mean()
    
    # Iterate over each ticker to save the resampled data back to individual files
    for ticker_symbol in ticker_dataframes.keys():
        # Extract the ticker's resampled data
        ticker_weekly_df = weekly_combined_df[ticker_symbol]
        
        # Reset the index to have the date as a column again
        ticker_weekly_df.reset_index(inplace=True)
        
        # Write the weekly data to a new CSV file in the weekly_data directory
        ticker_weekly_df.to_csv(os.path.join(weekly_data_dir, f'{ticker_symbol}_weekly.csv'), index=False)

# Call the function to execute the task
load_and_resample_ticker_data()


In [60]:
import os
import pandas as pd

def calculate_rsi(df, column='Close', period=14):
    delta = df[column].diff()
    gain = delta.clip(lower=0).rolling(window=period, min_periods=1).mean()
    loss = -delta.clip(upper=0).rolling(window=period, min_periods=1).mean()
    rs = gain / loss
    rsi = 100 - (100 / (1 + rs))
    return rsi

def calculate_mfi(df, period=14):
    typical_price = (df['High'] + df['Low'] + df['Close']) / 3
    money_fLow = typical_price * df['Volume']
    positive_fLow = money_fLow.where(typical_price > typical_price.shift(1), 0).rolling(window=period, min_periods=1).sum()
    negative_fLow = money_fLow.where(typical_price < typical_price.shift(1), 0).rolling(window=period, min_periods=1).sum()
    mfi = 100 - (100 / (1 + positive_fLow / negative_fLow))
    return mfi

def calculate_ultimate_oscillator(df, short_period=7, mid_period=14, long_period=28):
    # Ensure the DataFrame is sorted by date
    df = df.sort_index()

    # Calculate Buying Pressure (BP) and True Range (TR)
    df['Prior Close'] = df['Close'].shift(1)
    df['BP'] = df['Close'] - df[['Low', 'Prior Close']].min(axis=1)
    df['TR'] = df[['High', 'Prior Close']].max(axis=1) - df[['Low', 'Prior Close']].min(axis=1)
    
    # Calculate average BP and TR for each period
    df['Avg7_BP'] = df['BP'].rolling(window=short_period).sum()
    df['Avg7_TR'] = df['TR'].rolling(window=short_period).sum()
    df['Avg14_BP'] = df['BP'].rolling(window=mid_period).sum()
    df['Avg14_TR'] = df['TR'].rolling(window=mid_period).sum()
    df['Avg28_BP'] = df['BP'].rolling(window=long_period).sum()
    df['Avg28_TR'] = df['TR'].rolling(window=long_period).sum()
    
    # Calculate raw UO components
    df['R1'] = df['Avg7_BP'] / df['Avg7_TR']
    df['R2'] = df['Avg14_BP'] / df['Avg14_TR']
    df['R3'] = df['Avg28_BP'] / df['Avg28_TR']
    
    # Calculate Ultimate Oscillator
    df['Ultimate Oscillator'] = 100 * (4 * df['R1'] + 2 * df['R2'] + df['R3']) / (4 + 2 + 1)
    
    return df['Ultimate Oscillator']

def calculate_obv(df):
    # Calculate daily returns
    df['Daily Return'] = df['Close'].diff()
    
    # Calculate the direction of the volume flow
    df['Direction'] = 0
    df.loc[df['Daily Return'] > 0, 'Direction'] = 1
    df.loc[df['Daily Return'] < 0, 'Direction'] = -1
    
    # Calculate OBV
    df['OBV'] = (df['Volume'] * df['Direction']).cumsum()
    
    
    return df['OBV']
    
def calculate_close_open_avg_volume(df):
    # Calculate the average of 'Close' and 'Open' prices
    df['Close_Open_Avg'] = (df['Close'] + df['Open']) / 2
    
    # Multiply the average with 'Volume'
    df['Close_Open_Avg_Volume'] = df['Close_Open_Avg'] * df['Volume']
    
    return df['Close_Open_Avg_Volume']
    
def add_technical_indicators():
    weekly_data_dir = 'weekly_data'
    ta_data_dir = 'ta_data'
    
    if not os.path.exists(ta_data_dir):
        os.makedirs(ta_data_dir)
    
    # List all CSV files in the weekly_data directory
    ticker_files = [f for f in os.listdir(weekly_data_dir) if f.endswith('.csv')]
    
    for file in ticker_files:
        # Load the CSV file into a DataFrame
        df = pd.read_csv(os.path.join(weekly_data_dir, file))
        
        if df.empty:
            continue
        
        # Ensure the date column is in datetime format
        df['date'] = pd.to_datetime(df['date'])
        
        # Sort the dataframe by date
        df.sort_values(by='date', inplace=True)
        
        # Calculate technical indicators
        df['RSI'] = calculate_rsi(df)
        df['MFI'] = calculate_mfi(df)
        df['ULTOSC'] = calculate_ultimate_oscillator(df)
        df['OBV'] = calculate_obv(df)
        df['MON'] = calculate_close_open_avg_volume(df)
        
        # Remove any rows with NaN values introduced by the indicators
        df.dropna(inplace=True)
        
        # Save the updated DataFrame to the ta_data directory
        df.to_csv(os.path.join(ta_data_dir, file), index=False)

# Call the function to execute the task
add_technical_indicators()


In [62]:
import os
import pandas as pd

def load_ta_data():
    ta_data_dir = 'ta_data'
    
    # List all CSV files in the ta_data directory
    ticker_files = [f for f in os.listdir(ta_data_dir) if f.endswith('.csv')]
    
    # List to store DataFrames
    dataframes = []
    
    for file in ticker_files:
        # Extract the ticker symbol from the file name (assuming it's the file name without extension)
        ticker_symbol = os.path.splitext(file)[0].split('_')[0]
        # Load the CSV file into a DataFrame
        df = pd.read_csv(os.path.join(ta_data_dir, file))
        
        if df.empty:
            continue
        
        # Add a column for the ticker symbol
        df['ticker'] = ticker_symbol
        
        # Remove rows where 'MON' column is less than 1 million
        df = df[df['MON'] >= 1_000_000]   
        
        # Append the DataFrame to the list
        dataframes.append(df)
    
    # Concatenate all DataFrames
    combined_df = pd.concat(dataframes)
    
    # Ensure the date column is in datetime format
    combined_df['date'] = pd.to_datetime(combined_df['date'])
    
    # Set the date column as the index
    combined_df.set_index('date', inplace=True)
    
    return combined_df


# Load the data
combined_df = load_ta_data()
print(combined_df)



                 Open     High        Low      Close  Adj Close      Volume  \
date                                                                          
2016-03-27   5.280000   5.4150   4.990000   5.107500   0.062033    312025.0   
2018-08-19   6.416000   6.6640   6.356000   6.570000   0.210416    216060.0   
2018-09-02   1.980000   2.7500   1.768000   2.262000   2.262000   5711040.0   
2019-04-14   2.116000   2.8860   1.948000   2.508000   2.508000  22679300.0   
2019-04-21   2.760000   3.0400   2.632500   2.852500   2.852500   2961850.0   
...               ...      ...        ...        ...        ...         ...   
2024-05-12  42.128001  42.9470  41.620799  42.382001  42.382001   4026700.0   
2024-05-19  44.384000  45.0490  43.564000  44.304000  44.304000   3761720.0   
2024-05-26  42.140000  42.3770  41.230000  41.314000  41.314000   2514900.0   
2024-06-02  39.572499  40.3325  39.316250  39.797500  39.797500   3223475.0   
2024-06-09  41.806000  42.6716  41.508000  42.346000

In [65]:
import pandas as pd

# Assuming combined_df is already defined and contains the necessary data
# combined_df = ...

# Step 0: Reset index to make 'date' a column
combined_df = combined_df.reset_index()

# Step 1: Sort the DataFrame based on the 'RSI' column for each date in descending order
combined_df = combined_df.sort_values(by=['date', 'MON'], ascending=[True, False])

# Step 2: Select the top n tickers with the highest RSI score for each date
n = 5  # For example, we can set n to 5
top_n_df = combined_df.groupby('date').head(n).copy()

# Step 3: Calculate the percentage change in the 'Close' column for the selected tickers between the current week and the next week
# Create a new column for the percentage change
top_n_df.loc[:, 'pct_change'] = top_n_df.groupby('ticker')['Close'].pct_change(periods=1) * 100

# Step 4: Summarize the results
summary = top_n_df.dropna(subset=['pct_change'])[['date', 'ticker', 'pct_change']]
print(summary)

# Additionally, you can group by date to see the average percentage change for each date
date_summary = summary.groupby('date')['pct_change'].mean().reset_index()
print(date_summary)

# Step 5: Save the summary to a CSV file
date_summary.to_csv('summary.csv', index=False)

             date ticker  pct_change
1549   2014-12-28   AAPL    2.829707
2974   2014-12-28    XOM    4.810120
2978   2014-12-28  GOOGL    5.157897
2993   2015-01-04   AAPL   -1.182778
4233   2015-01-04   GILD    3.897343
...           ...    ...         ...
982025 2024-06-09   NVDA    6.182654
982026 2024-06-09   TSLA   -0.651724
982027 2024-06-09   AAPL    2.183237
982032 2024-06-09    GME   -4.619582
982029 2024-06-09  GOOGL   -0.696839

[2395 rows x 3 columns]
          date  pct_change
0   2014-12-28    4.265908
1   2015-01-04    1.248673
2   2015-01-11   -2.105625
3   2015-01-18   -2.170253
4   2015-01-25    1.869895
..         ...         ...
489 2024-05-12    1.756213
490 2024-05-19   -7.442087
491 2024-05-26    3.018893
492 2024-06-02    2.726708
493 2024-06-09    0.479549

[494 rows x 2 columns]
