In [None]:
import sys
import pymc3 as pm

print("Python Executable:", sys.executable)

def create_model():
    with pm.Model() as model:
        alpha = pm.Normal('alpha', mu=0, sigma=1)
        beta = pm.Normal('beta', mu=0, sigma=1)
        sigma = pm.HalfNormal('sigma', sigma=1)
    print("Model created successfully.")

# Invoke the function to create the model
create_model()


In [2]:
import sys
import pymc as pm

print("Python Executable:", sys.executable)

def create_model():
    with pm.Model() as model:
        alpha = pm.Normal('alpha', mu=0, sigma=1)
        beta = pm.Normal('beta', mu=0, sigma=1)
        sigma = pm.HalfNormal('sigma', sigma=1)
    print("Model created successfully.")

# Invoke the function to create the model
create_model()




Python Executable: C:\Users\saide\anaconda3\envs\pymc_env\python.exe
Model created successfully.


In [3]:
import numpy as np
print("NumPy Version:", np.__version__)


NumPy Version: 1.19.5


In [2]:
import sklearn
print(sklearn.__version__)

1.3.0


In [1]:
2 + 3

5

In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import logging
import pymc as pm
import numpy as np
from joblib import Parallel, delayed
from sklearn.metrics import mean_squared_error  # Directly importing
from sklearn.model_selection import train_test_split  # Directly importing
import aesara.tensor as at  # Importing Aesara for mathematical operations

# Initialize logging
logging.basicConfig(filename='data_analysis.log', level=logging.INFO)

try:
    logging.info('Loading data...')
    df = pd.read_csv('etf_data/preprocessed_SPY_2016-01-01_to_2020-12-31.csv')

    logging.info('Starting EDA...')
    
    def plot_data(column):
        plt.figure()
        sns.histplot(df[column])
        plt.title(f'Histogram for {column}')
        plt.savefig(f'{column}_hist.png')

    Parallel(n_jobs=4)(delayed(plot_data)(col) for col in df.columns)

    correlation_matrix = df.corr()
    print(correlation_matrix)
    logging.info('Correlation matrix computed.')

    logging.info('Starting model development...')
    
    selected_features = df.columns.tolist()
    
    X_train, X_test, y_train, y_test = train_test_split(df[selected_features], df['target'], test_size=0.2)

    with pm.Model() as model:
        alpha = pm.Normal('alpha', mu=0, sigma=1)
        beta = pm.Normal('beta', mu=0, sigma=1, shape=len(selected_features))
        sigma = pm.HalfNormal('sigma', sigma=1)
        
        mu_value = alpha + at.dot(X_train, beta)  # Using Aesara's dot function
        
        y_obs = pm.Normal('y_obs', mu=mu_value, sigma=sigma, observed=y_train)
        
        trace = pm.sample(5000, tune=2000, target_accept=0.9, cores=4)

    logging.info('Model trained.')

    ppc = pm.sample_posterior_predictive(trace, model=model, samples=5000)
    y_pred = ppc['y_obs'].mean(axis=0)
    
    mse = mean_squared_error(y_test, y_pred)
    print('Mean Squared Error:', mse)
    logging.info(f'Mean Squared Error: {mse}')

except Exception as e:
    logging.error(f'An error occurred: {e}')
