In [None]:
import requests
import json
import pandas as pd
import os
from datetime import datetime, timedelta
import yfinance as yf
import time
from sqlalchemy import create_engine
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

api_key = os.getenv('api_key')

In [None]:
import os
import requests
import pandas as pd

def get_quarterly_data(url_base, ticker, api_key, period):
    params = {
        'symbol': ticker,
        'period': period,
        'limit': 20,
        'apikey': api_key,
    }
    full_url = f"{url_base}?{'&'.join([f'{key}={value}' for key, value in params.items()])}"
    response = requests.get(full_url)
    if response.status_code == 200:
        try:
            df = pd.DataFrame(response.json())
            if not df.empty:
                df['date'] = pd.to_datetime(df['date'])
                df['period'] = period
            return df
        except ValueError:
            print(f"JSON decoding failed for {url_base} - {period}")
            return pd.DataFrame()
    else:
        print(f"Request failed for {url_base} - {period}")
        return pd.DataFrame()


def GetAllFundamentalData(ticker, api_key):
    income_url = "https://financialmodelingprep.com/stable/income-statement"
    balance_url = "https://financialmodelingprep.com/stable/balance-sheet-statement"
    cashflow_url = "https://financialmodelingprep.com/stable/cash-flow-statement"
    
    all_income, all_balance, all_cash = [], [], []

    for period in ['Q1', 'Q2', 'Q3', 'Q4']:
        income_df = get_quarterly_data(income_url, ticker, api_key, period)
        balance_df = get_quarterly_data(balance_url, ticker, api_key, period)
        cashflow_df = get_quarterly_data(cashflow_url, ticker, api_key, period)

        if not (income_df.empty or balance_df.empty or cashflow_df.empty):
            # Align columns before merge
            for df in [income_df, balance_df, cashflow_df]:
                df.set_index('date', inplace=True)

            # Inner merge to keep only matching dates across all statements
            merged = income_df.join(balance_df, how='inner', lsuffix='_income', rsuffix='_balance')
            merged = merged.join(cashflow_df, how='inner', rsuffix='_cashflow')

            merged.reset_index(inplace=True)
            merged['period'] = period
            all_income.append(merged)

    if all_income:
        final_df = pd.concat(all_income, ignore_index=True)
        final_df.sort_values(by='date', inplace=True)

        directory = "Fundamentals"
        if not os.path.exists(directory):
            os.makedirs(directory)
        filename = os.path.join(directory, f"{ticker}_all_fundamentals.csv")
        final_df.to_csv(filename, index=False)
        print(f"Saved combined fundamental data to {filename}")
        return final_df
    else:
        print("No combined data found for given periods.")
        return pd.DataFrame()

# Example usage
ticker = "AAPL"
api_key = os.getenv("api_key")
df = GetAllFundamentalData(ticker, api_key)
print(df.head())


In [None]:
def get30MinData(apiKey, ticker, end):
    def push_to_pgsql(df, ticker):
        table_name = f"{ticker}_30MinData"

        # Database connection details (from Docker Compose)
        db_params = {
            'user': os.getenv('user'),
            'password': os.getenv('password'),
            'host': os.getenv('host'),
            'port': os.getenv('port'),
            'dbname': os.getenv('dbname')
        }

        # SQLAlchemy connection string (using postgresql dialect)
        engine_str = (
            f"postgresql://{db_params['user']}:{db_params['password']}"
            f"@{db_params['host']}:{db_params['port']}/{db_params['dbname']}"
        )

        # Create SQLAlchemy engine
        engine = create_engine(engine_str)

        try:
            df.to_sql(table_name, engine, if_exists='replace', index=False, method='multi')
            print(f"✅ Data successfully written to table '{table_name}' in PostgreSQL.")
        except Exception as e:
            print(f"❌ Failed to write data to PostgreSQL: {e}")
    def get_ipo_date(ticker, api_key):
        url = f"https://financialmodelingprep.com/stable/profile?symbol={ticker}&apikey={api_key}"
        response = requests.get(url)
        if response.status_code == 200:
            try:
                data = response.json()
                ipo_date = data[0].get('ipoDate')
                if ipo_date:
                    return ipo_date
            except (KeyError, IndexError, ValueError):
                print(f"⚠️ Failed to parse IPO date for {ticker}")
        else:
            print(f"⚠️ Failed to fetch IPO info. Status code: {response.status_code}")
        return None

    # Initialize rate limiter state
    rate_limiter = {
        'start_time': time.time(),
        'call_count': 0
    }
    
    def rate_limited_get(url, params):
        """Make API calls while respecting the rate limit"""
        nonlocal rate_limiter
        
        # Calculate time since last reset
        elapsed = time.time() - rate_limiter['start_time']
        
        # Handle rate limiting
        if rate_limiter['call_count'] >= 290:  # Using 290 for safety buffer
            if elapsed < 60:
                # Calculate precise wait time
                wait_time = 60.01 - elapsed  # Add 10ms buffer
                print(f"⏳ API limit reached. Waiting {wait_time:.2f} seconds...")
                time.sleep(wait_time)
            
            # Reset counter after waiting
            rate_limiter['start_time'] = time.time()
            rate_limiter['call_count'] = 0
            print("♻️ Rate limit counter reset")
        
        # Make the API call
        rate_limiter['call_count'] += 1
        print(f"📞 API call #{rate_limiter['call_count']} (Elapsed: {elapsed:.2f}s)")
        finalurl =  f"{url}?{'&'.join([f'{key}={value}' for key, value in params.items()])}"
        print(f"🔗 Request URL: {finalurl}")
        
        return requests.get(finalurl)
    
    def pullData(ticker, apiKey, start, end, period = '30min'):
        #putting a switch case for 30mins and daily data url
        if period == '30min':
            base_url = "https://financialmodelingprep.com/stable/historical-chart/30min"
        elif period == 'daily':
            base_url = "https://financialmodelingprep.com/stable/historical-price-eod/full"
        else:
            raise ValueError("Invalid period. Use '30mins' or 'daily'.")
        
        
        params = {
            'to': end,
            'from': start,
            'symbol': ticker,
            'apikey': apiKey,
        }
        
        response = rate_limited_get(base_url, params=params)
        
        # Check for successful response
        if response.status_code != 200:
            print(f"⚠️ Error {response.status_code} for {start} to {end}")
        print(f"📥 Downloaded data for {ticker} from {start} to {end} ({period})")
            
        data = response.json()
        #converting the data to a pandas dataframe
        df = pd.DataFrame(data)
        #getting the earliest and latest date from the data
        if len(df) == 0:
            print(f"No data found for {ticker} from {start} to {end}.")
            return pd.DataFrame(), start
        earliestDate = df['date'].min()
        latestDate = df['date'].max()
        
        print(f"📅 Earliest date: {earliestDate}, Latest date: {latestDate}")
        
        #dropping the high and low columns if they exist
        if 'high' in df.columns and 'low' in df.columns:
            df = df.drop(columns=['high', 'low'])
        #drop 'change', 'changePercent' and 'vwap' columns if they exist
        if 'change' in df.columns and 'changePercent' in df.columns and 'vwap' in df.columns:
            df = df.drop(columns=['change', 'changePercent', 'vwap'])
            print("Dropped 'change', 'changePercent', and 'vwap' columns.")
            print("the length of the dataframe is: ", len(df))
        
        #getting the earliest Date and removing the time part
        if period == '30min':
            earliestDate = earliestDate.split(' ')[0]
        #converting the date to a datetime object
        earliestDate = datetime.strptime(earliestDate, '%Y-%m-%d').strftime('%Y-%m-%d')
        
        #returning the dataframe and the earliest date
        return df, earliestDate
    
    def addLastEntry(df, current):
        dataFrame=df 
        for idx, row in current.iterrows():
            # getting the date from the current row
            date_str = row['date']
            
            # finding the row in dataFrame with this date %Y-%m-%d 15:30:00
            match_row = dataFrame[dataFrame['date'] == date_str.split(' ')[0] + ' 15:30:00']
            
            # converting the date to a datetime object with hour set to 16:00:00
            date = datetime.strptime(date_str.split(' ')[0], '%Y-%m-%d') + timedelta(hours=16)
            
            # setting the "open" value of the current day to the "close" value of match_row
            if not match_row.empty:
                open_value = match_row['close'].values[0]
            else:
                open_value = None
            
            # adding the row to the dataframe with the date set to 16:00:00
            new_row = pd.DataFrame([{
                'date': date.strftime('%Y-%m-%d %H:%M:%S'),
                'open': open_value,
                'close': row['close'],
                'volume': row['volume']
            }])
            dataFrame = pd.concat([dataFrame, new_row], ignore_index=True)
            
        return dataFrame
    
    #the data is stored in a pandas dataframe
    df = pd.DataFrame()
    
    
    
    #converting the start and end dates to datetime strings with date and hour format %Y-%m-%d
    # Get IPO date for the ticker and use it as the start date
    ipoDate = get_ipo_date(ticker, apiKey)
    if ipoDate is None:
        raise ValueError(f"Could not retrieve IPO date for {ticker}")
        return None
    
    start = datetime.strptime(ipoDate, '%Y-%m-%d')
    end = datetime.strptime(end, '%Y-%m-%d')
    
    #remove the hour component from the start and end dates
    start = start.strftime('%Y-%m-%d')
    end = end.strftime('%Y-%m-%d')
    
    print(f"Getting data for {ticker} from {start} to {end}")
    
    currentEnd = []
    currentEnd.append(end)
    #getting the earliest date in currentEnd
    while start not in currentEnd:
        #get the data for the current date
        currentData, earliestDate = pullData(ticker, apiKey, start=start, end=currentEnd, period = '30min')
        print(f"Length of currentData: {len(currentData)}")
        
        # If earliestDate is the same as currentEnd, set currentEnd to 15:30:00 of the day before earliestDate
        
        #remove the hour component from earliestDate
        earliestDate = earliestDate.split(' ')[0]
        if earliestDate in currentEnd:
            prev_day = datetime.strptime(earliestDate, '%Y-%m-%d') - timedelta(days=1)
            currentEnd = prev_day.strftime('%Y-%m-%d')
        else:
            currentEnd = earliestDate
        print("the current end is: ", currentEnd)
        df = pd.concat([df, currentData], ignore_index=True)
        
        
    #the current end is the end with %Y-%m-%d format
    currentEnd = end
    while start not in currentEnd:
        #get the data for the current date
        currentData, earliestDate = pullData(ticker, apiKey, start=start, end=currentEnd, period = 'daily')
        
        # If earliestDate is the same as currentEnd, set currentEnd to 15:30:00 of the day before earliestDate
        if earliestDate in currentEnd:
            prev_day = datetime.strptime(earliestDate, '%Y-%m-%d') - timedelta(days=1)
            currentEnd = prev_day.strftime('%Y-%m-%d')
        else:
            currentEnd = earliestDate
        print("the current end is: ", currentEnd)
        df = addLastEntry(df, currentData)
        
    #sort the dataframe by date
    df['date'] = pd.to_datetime(df['date'])
    df.sort_values(by='date', inplace=True)
    #remove duplicates based on date
    df = df.drop_duplicates(subset='date')
    #push it to the postgres database
    push_to_pgsql(df, ticker)
    return df
    

In [None]:
ticker = "AAPL"
end_date='2025-05-31'

data = get30MinData(api_key, ticker,end_date)
    


# Save to CSV if data was retrieved
if not data.empty:
    fileName = f"{ticker}_30Min_data.csv"
    data.to_csv(fileName, index=False)
    print(f"Data saved to {fileName}")

In [None]:
#putting a switch case for hourly and daily data url

base_url = "https://financialmodelingprep.com/stable/stock-list?apikey={api_key}"

def get_stock_list(base_url):
    response = requests.get(base_url)
    if response.status_code == 200:
        try:
            data = response.json()
            return pd.DataFrame(data)
        except ValueError:
            print("JSON decoding failed")
            return pd.DataFrame()
    else:
        print(f"Request failed with status code {response.status_code}")
        return pd.DataFrame()
    
# Example usage
stock_list_df = get_stock_list(base_url)

#remove all values that have ETF in their symbol or companyName
stock_list_df = stock_list_df[~stock_list_df['symbol'].str.contains('ETF', na=False)]
stock_list_df = stock_list_df[~stock_list_df['companyName'].str.contains('ETF', na=False)]

print(len(stock_list_df), " stocks found.")



In [None]:
baseurl = "https://financialmodelingprep.com/stable/company-screener?country=US&isEtf=false&isFund=false&isActivelyTrading=true&apikey={api_key}"

def get_company_screener(baseurl):
    response = requests.get(baseurl)
    if response.status_code == 200:
        try:
            data = response.json()
            return pd.DataFrame(data)
        except ValueError:
            print("JSON decoding failed")
            return pd.DataFrame()
    else:
        print(f"Request failed with status code {response.status_code}")
        return pd.DataFrame()
    
# Example usage
company_screener_df = get_company_screener(baseurl)
print(len(company_screener_df), " companies found in the screener.")
# Remove all values that have ETF in their symbol or companyName
company_screener_df = company_screener_df[~company_screener_df['symbol'].str.contains('ETF', na=False)]
company_screener_df = company_screener_df[~company_screener_df['companyName'].str.contains('ETF', na=False)]
print(len(company_screener_df), " companies found in the screener.")
print(company_screener_df.columns)

#make a copy which is sorted according to marketCap
sorted_company_screener_df = company_screener_df.copy()
sorted_company_screener_df['marketCap'] = pd.to_numeric(sorted_company_screener_df['marketCap'], errors='coerce')
sorted_company_screener_df.sort_values(by='marketCap', ascending=False, inplace=True)

#make a copy which is sorted according to price
sorted_company_screener_df_price = company_screener_df.copy()
sorted_company_screener_df_price['price'] = pd.to_numeric(sorted_company_screener_df_price['price'], errors='coerce')
sorted_company_screener_df_price.sort_values(by='price', ascending=False, inplace=True)

print(len(company_screener_df), " companies found.")

# Save the all the variations of company_screener_df to CSV files
company_screener_df.to_csv("company_screener.csv", index=False)
sorted_company_screener_df.to_csv("company_screener_sorted_by_marketCap.csv", index=False)
sorted_company_screener_df_price.to_csv("company_screener_sorted_by_price.csv", index=False)





In [None]:
tickerCounter = 0
#getting hourly data for all the stocks in company_screener_df
while tickerCounter <= 100:
    for index, row in sorted_company_screener_df.iterrows():
        
        print(f"Ticker Counter: {tickerCounter}")
        ticker = row['symbol']
        #check if the ticker is already in the file
        fileName = f"{ticker}_30Min_data.csv"
        if os.path.exists(fileName):
            print(f"Data for {ticker} already exists in {fileName}. Skipping...")
            tickerCounter += 1
            continue
        print(f"Getting data for {ticker}...")
        try:
            df = get30MinData(api_key, ticker, end_date)
            if not df.empty:
                fileName = f"{ticker}_30Min_data.csv"
                df.to_csv(fileName, index=False)
                print(f"Data saved to {fileName}")
                tickerCounter += 1
            else:
                print(f"No data found for {ticker}.")
        except Exception as e:
            print(f"Error fetching data for {ticker}: {e}")
            
#revert the tickerCounter to 0
tickerCounter = 0
#reverse the sorted_company_screener_df
reverse_sorted_company_screener_df = company_screener_df.copy()
reverse_sorted_company_screener_df['marketCap'] = pd.to_numeric(sorted_company_screener_df['marketCap'], errors='coerce')
reverse_sorted_company_screener_df.sort_values(by='marketCap', ascending=True, inplace=True)

#getting hourly data for all the stocks in company_screener_df
while tickerCounter <= 100:
    for index, row in sorted_company_screener_df.iterrows():
        ticker = row['symbol']
        #check if the ticker is already in the file
        fileName = f"{ticker}_30Min_data.csv"
        if os.path.exists(fileName):
            print(f"Data for {ticker} already exists in {fileName}. Skipping...")
            tickerCounter += 1
            continue
        print(f"Getting data for {ticker}...")
        try:
            df = get30MinData(api_key, ticker, end_date)
            if not df.empty:
                fileName = f"{ticker}_30Min_data.csv"
                df.to_csv(fileName, index=False)
                print(f"Data saved to {fileName}")
                tickerCounter += 1
            else:
                print(f"No data found for {ticker}.")
        except Exception as e:
            print(f"Error fetching data for {ticker}: {e}")      
            
