In [None]:
import sys
import os
import time

import numpy as np
import pandas as pd
import sklearn
import requests

from matplotlib import pyplot as plt
plt.rcParams.update({'font.size': 16})

# Let's check our software versions
print('------------')
print('### Python version: ' + __import__('sys').version)
print('### NumPy version: ' + np.__version__)
print('### Scikit-learn version: ' + sklearn.__version__)
print('------------')

def var_exists(var_name):
    return (var_name in globals() or var_name in locals())

In [None]:
# Get tech tickers
r = requests.get('https://swamplocksapi.azurewebsites.net/api/Financials/stocks')
r.status_code
df = pd.DataFrame(r.json())

techTickers = []
for i in range(463):
    techTickers.append(df.iloc[i][0])
    # if df.iloc[i][1] == 'Information Technology':
    #     techTickers.append(df.iloc[i][0])

In [None]:
# AAPL specific prediction from here down.
# Load AAPL's data

# AAPL closing price stored in dfAAPLclose
r = requests.get('https://swamplocksapi.azurewebsites.net/api/Financials/stocks/AAPL/filtered_data')
dfAAPLclose = pd.DataFrame(r.json())
dfAAPLclose['date'] = pd.to_datetime(dfAAPLclose['date'])
dfAAPLclose = dfAAPLclose.sort_values('date')

# 'dfbs' stores AAPL's data from the balance sheet
aaplbsURL = 'https://swamplocksapi.azurewebsites.net/api/Financials/balancesheets/AAPL'
r = requests.get(aaplbsURL)
dfbs = pd.DataFrame(r.json())

# 'dfcf' stores AAPL's data from cash flow statement
aaplcfURL = 'https://swamplocksapi.azurewebsites.net/api/Financials/cashflowstatements/AAPL'
r = requests.get(aaplcfURL)
dfcf = pd.DataFrame(r.json())

# 'dfe' stores AAPL's data from earnings statement
aapleURL = 'https://swamplocksapi.azurewebsites.net/api/Financials/earnings/AAPL'
r = requests.get(aapleURL)
dfe = pd.DataFrame(r.json())

# 'dfis' stores AAPL's data from income statement
aaplisURL = 'https://swamplocksapi.azurewebsites.net/api/Financials/incomestatements/AAPL'
r = requests.get(aaplisURL)
dfis = pd.DataFrame(r.json())

In [None]:
df_closing_price = dfAAPLclose[['date', 'closingPrice']]

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
# Ensure datetime consistency
dfe['fiscalDateEnding'] = pd.to_datetime(dfe['fiscalDateEnding'])
dfcf['fiscalDateEnding'] = pd.to_datetime(dfcf['fiscalDateEnding'])
dfis['fiscalDateEnding'] = pd.to_datetime(dfis['fiscalDateEnding'])
df_closing_price['date'] = pd.to_datetime(df_closing_price['date'])

# Create next quarter end date
dfe['nextQuarterEnd'] = dfe['fiscalDateEnding'] + pd.DateOffset(months=3)
dfcf['nextQuarterEnd'] = dfcf['fiscalDateEnding'] + pd.DateOffset(months=3)
dfis['nextQuarterEnd'] = dfis['fiscalDateEnding'] + pd.DateOffset(months=3)

# Sort by date
dfe = dfe.sort_values('nextQuarterEnd')
dfcf = dfcf.sort_values('nextQuarterEnd')
dfis = dfis.sort_values('nextQuarterEnd')
df_closing_price = df_closing_price.sort_values('date')

merged_financials = pd.merge(
    dfe[['ticker', 'fiscalDateEnding', 'nextQuarterEnd', 'reportedEPS']],
    dfcf[['ticker', 'fiscalDateEnding', 'netIncome', 'paymentsForRepurchaseOfCommonStock', 'profitLoss']],
    on=['ticker', 'fiscalDateEnding'],
    how='inner'
)

# now merge the result with dfis
merged_financials = pd.merge(
    merged_financials,
    dfis[['ticker', 'fiscalDateEnding', 'researchAndDevelopment', 'operatingExpenses', 'sellingGeneralAndAdministrative']],
    on=['ticker', 'fiscalDateEnding'],
    how='inner'
)

# Merge with closing price using merge_asof
final_data = pd.merge_asof(
    merged_financials,
    df_closing_price,
    left_on='nextQuarterEnd',
    right_on='date',
    direction='forward',
    tolerance=pd.Timedelta("30 days")
)

# Filter out rows without marketCap
final_data = final_data.dropna(subset=['closingPrice']).reset_index(drop=True)
"""
researchAndDevelopment               0.953399
operatingExpenses                    0.935023
sellingGeneralAndAdministrative      0.891001
"""
# Define features (X) and target (y)  cashFlowFromFinancing paymentsForRepurchaseOfEquity
features = ['reportedEPS', 'netIncome', 'paymentsForRepurchaseOfCommonStock', 
            'profitLoss',
            'researchAndDevelopment', 'operatingExpenses', 'sellingGeneralAndAdministrative']
X = final_data[features]
y = final_data['closingPrice']

# Verify shapes
print("X shape:", X.shape)  #
print("y shape:", y.shape)  # 

# Split data
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Display shapes
print(f'Training set: {X_train.shape}, {y_train.shape}')  # (34, 5), (34,)
print(f'Validation set: {X_val.shape}, {y_val.shape}')  # (11, 5), (11,)
print(f'Test set: {X_test.shape}, {y_test.shape}')      # (12, 5), (12,)

In [None]:
from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import numpy as np
import joblib

# Log-transform y
y_log = np.log1p(y)

# Split data
X_train, X_temp, y_train_log, y_temp_log = train_test_split(X, y_log, test_size=0.4, random_state=42)
X_val, X_test, y_val_log, y_test_log = train_test_split(X_temp, y_temp_log, test_size=0.5, random_state=42)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

# Grid search for SVR
param_grid = {
    'C': [10, 100, 1000],
    'gamma': [0.01, 0.1, 1],
    'kernel': ['rbf']
}
svm = SVR()
grid_search = GridSearchCV(svm, param_grid, cv=5, scoring='neg_mean_squared_error')
grid_search.fit(X_train_scaled, y_train_log)

# Best model
best_svm = grid_search.best_estimator_
print("Best params:", grid_search.best_params_)

# Predict and evaluate on log scale
y_val_pred_log = best_svm.predict(X_val_scaled)
val_mse_log = mean_squared_error(y_val_log, y_val_pred_log)
print(f"Validation MSE (log scale): {val_mse_log}")

y_test_pred_log = best_svm.predict(X_test_scaled)
test_mse_log = mean_squared_error(y_test_log, y_test_pred_log)
print(f"Test MSE (log scale): {test_mse_log}")

# Convert back to original scale
y_val_pred = np.expm1(y_val_pred_log)
y_test_pred = np.expm1(y_test_pred_log)
val_mse = mean_squared_error(np.expm1(y_val_log), y_val_pred)
test_mse = mean_squared_error(np.expm1(y_test_log), y_test_pred)
print(f"Validation MSE (original scale): {val_mse}")
print(f"Test MSE (original scale): {test_mse}")

ticker = "AAPL"

model_dir = os.path.join('Models', ticker)
os.makedirs(model_dir, exist_ok=True)

# save model
joblib.dump(best_svm, os.path.join(model_dir, 'best_svm_model.pkl'))

# save scalar
joblib.dump(scaler, os.path.join(model_dir, 'scaler.pkl'))

# save input
latest_data = X.iloc[-1].to_frame().T
future_quarters = 1
future_X = pd.DataFrame([latest_data.values[0]] * future_quarters, columns=features)

joblib.dump(future_X, os.path.join(model_dir, f'{ticker}_latest_features.pkl'))

In [None]:

# Latest data for prediction (last row of X)
latest_data = X.iloc[-1].to_frame().T

# Scale and predict for next 2 quarters
future_quarters = 1
future_X = pd.DataFrame([latest_data.values[0]] * future_quarters, columns=features)
future_X_scaled = scaler.transform(future_X)
future_log_preds = best_svm.predict(future_X_scaled)
future_cp_preds = np.expm1(future_log_preds)

In [None]:
# Predicted price, last quarter's price
lastQuartersPrice = y.iloc[-1]
predictedQuartersPrice = future_cp_preds[0]
print('Projected price next quarter:' , predictedQuartersPrice , 'Last quarter\'s price:' , lastQuartersPrice)
print('Projected percent change by next quarter:' , (1 - lastQuartersPrice/predictedQuartersPrice) * 100)

In [None]:
from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
failed = []
loop = 1
if loop:
    for t in techTickers:
        try:
            print(t)
            # AAPL closing price stored in dfAAPLclose
            r = requests.get('https://swamplocksapi.azurewebsites.net/api/Financials/stocks/' + t + '/filtered_data')
            dfAAPLclose = pd.DataFrame(r.json())
            dfAAPLclose['date'] = pd.to_datetime(dfAAPLclose['date'])
            dfAAPLclose = dfAAPLclose.sort_values('date')

            # 'dfbs' stores AAPL's data from the balance sheet
            aaplbsURL = 'https://swamplocksapi.azurewebsites.net/api/Financials/balancesheets/' + t
            r = requests.get(aaplbsURL)
            dfbs = pd.DataFrame(r.json())

            # 'dfcf' stores AAPL's data from cash flow statement
            aaplcfURL = 'https://swamplocksapi.azurewebsites.net/api/Financials/cashflowstatements/' + t
            r = requests.get(aaplcfURL)
            dfcf = pd.DataFrame(r.json())

            # 'dfe' stores AAPL's data from earnings statement
            aapleURL = 'https://swamplocksapi.azurewebsites.net/api/Financials/earnings/' + t
            r = requests.get(aapleURL)
            dfe = pd.DataFrame(r.json())

            # 'dfis' stores AAPL's data from income statement
            aaplisURL = 'https://swamplocksapi.azurewebsites.net/api/Financials/incomestatements/' + t
            r = requests.get(aaplisURL)
            dfis = pd.DataFrame(r.json())
            df_closing_price = dfAAPLclose[['date', 'closingPrice']]

            # Ensure datetime consistency
            dfe['fiscalDateEnding'] = pd.to_datetime(dfe['fiscalDateEnding'])
            dfcf['fiscalDateEnding'] = pd.to_datetime(dfcf['fiscalDateEnding'])
            dfis['fiscalDateEnding'] = pd.to_datetime(dfis['fiscalDateEnding'])
            df_closing_price.loc[:, 'date'] = pd.to_datetime(df_closing_price['date'])

            # Create next quarter end date
            dfe['nextQuarterEnd'] = dfe['fiscalDateEnding'] + pd.DateOffset(months=3)
            dfcf['nextQuarterEnd'] = dfcf['fiscalDateEnding'] + pd.DateOffset(months=3)
            dfis['nextQuarterEnd'] = dfis['fiscalDateEnding'] + pd.DateOffset(months=3)

            # Sort by date
            dfe = dfe.sort_values('nextQuarterEnd')
            dfcf = dfcf.sort_values('nextQuarterEnd')
            dfis = dfis.sort_values('nextQuarterEnd')
            df_closing_price = df_closing_price.sort_values('date')

            merged_financials = pd.merge(
                dfe[['ticker', 'fiscalDateEnding', 'nextQuarterEnd', 'reportedEPS']],
                dfcf[['ticker', 'fiscalDateEnding', 'netIncome', 'paymentsForRepurchaseOfCommonStock', 'profitLoss']],
                on=['ticker', 'fiscalDateEnding'],
                how='inner'
            )

            # now merge the result with dfis
            merged_financials = pd.merge(
                merged_financials,
                dfis[['ticker', 'fiscalDateEnding', 'researchAndDevelopment', 'operatingExpenses', 'sellingGeneralAndAdministrative']],
                on=['ticker', 'fiscalDateEnding'],
                how='inner'
            )

            # Merge with closing price using merge_asof
            final_data = pd.merge_asof(
                merged_financials,
                df_closing_price,
                left_on='nextQuarterEnd',
                right_on='date',
                direction='forward',
                tolerance=pd.Timedelta("30 days")
            )

            # Filter out rows without marketCap
            final_data = final_data.dropna(subset=['closingPrice']).reset_index(drop=True)
            """
            researchAndDevelopment               0.953399
            operatingExpenses                    0.935023
            sellingGeneralAndAdministrative      0.891001
            """
            # Define features (X) and target (y)  cashFlowFromFinancing paymentsForRepurchaseOfEquity
            features = ['reportedEPS', 'netIncome', 'paymentsForRepurchaseOfCommonStock', 
                        'profitLoss',
                        'researchAndDevelopment', 'operatingExpenses', 'sellingGeneralAndAdministrative']
            X = final_data[features]
            y = final_data['closingPrice']

            # Verify shapes
            #print("X shape:", X.shape)  #
            #print("y shape:", y.shape)  # 

            # Split data
            X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=42)
            X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

            # Display shapes
            # print(f'Training set: {X_train.shape}, {y_train.shape}')  # (34, 5), (34,)
            # print(f'Validation set: {X_val.shape}, {y_val.shape}')  # (11, 5), (11,)
            # print(f'Test set: {X_test.shape}, {y_test.shape}')      # (12, 5), (12,)


            # Log-transform y
            y_log = np.log1p(y)

            # Split data
            X_train, X_temp, y_train_log, y_temp_log = train_test_split(X, y_log, test_size=0.4, random_state=42)
            X_val, X_test, y_val_log, y_test_log = train_test_split(X_temp, y_temp_log, test_size=0.5, random_state=42)

            # Scale features
            scaler = StandardScaler()
            X_train_scaled = scaler.fit_transform(X_train)
            X_val_scaled = scaler.transform(X_val)
            X_test_scaled = scaler.transform(X_test)

            # Grid search for SVR
            param_grid = {
                'C': [10, 100, 1000],
                'gamma': [0.01, 0.1, 1],
                'kernel': ['rbf']
            }
            svm = SVR()
            grid_search = GridSearchCV(svm, param_grid, cv=5, scoring='neg_mean_squared_error')
            grid_search.fit(X_train_scaled, y_train_log)

            # Best model
            best_svm = grid_search.best_estimator_
            #print("Best params:", grid_search.best_params_)

            # Predict and evaluate on log scale
            y_val_pred_log = best_svm.predict(X_val_scaled)
            val_mse_log = mean_squared_error(y_val_log, y_val_pred_log)
            #print(f"Validation MSE (log scale): {val_mse_log}")

            model_dir = os.path.join('Models', t)
            os.makedirs(model_dir, exist_ok=True)

            # save model
            joblib.dump(best_svm, os.path.join(model_dir, 'best_svm_model.pkl'))

            # save scalar
            joblib.dump(scaler, os.path.join(model_dir, 'scaler.pkl'))

            # save input
            latest_data = X.iloc[-1].to_frame().T
            future_quarters = 1
            future_X = pd.DataFrame([latest_data.values[0]] * future_quarters, columns=features)

            joblib.dump(future_X, os.path.join(model_dir, f'{t}_latest_features.pkl'))

            y_test_pred_log = best_svm.predict(X_test_scaled)
            test_mse_log = mean_squared_error(y_test_log, y_test_pred_log)
            #print(f"Test MSE (log scale): {test_mse_log}")

            # Convert back to original scale
            y_val_pred = np.expm1(y_val_pred_log)
            y_test_pred = np.expm1(y_test_pred_log)
            val_mse = mean_squared_error(np.expm1(y_val_log), y_val_pred)
            test_mse = mean_squared_error(np.expm1(y_test_log), y_test_pred)
            #print(f"Validation MSE (original scale): {val_mse}")
            #print(f"Test MSE (original scale): {test_mse}")
            # Latest data for prediction (last row of X)
            latest_data = X.iloc[-1].to_frame().T

            # Scale and predict for next 2 quarters
            future_quarters = 1
            future_X = pd.DataFrame([latest_data.values[0]] * future_quarters, columns=features)
            future_X_scaled = scaler.transform(future_X)
            future_log_preds = best_svm.predict(future_X_scaled)
            future_cp_preds = np.expm1(future_log_preds)
            #print(t, future_cp_preds)
        except Exception as e:
            failed.append(t)
            print(f"Stock {t} failed. Error {e}")

print(failed)