In [1]:
# Import necessary libraries
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# Step 1: Define the emails and labels
emails = [
    "Win a free iPhone now! Click the link below.",  # Spam
    "Congratulations! You have won a lottery. Claim your prize now!",  # Spam
    "Dear John, your invoice for last month is attached.",  # Not Spam
    "Meeting at 10 AM tomorrow. Please confirm your attendance.",  # Not Spam
    "Earn money quickly by joining this amazing opportunity!",  # Spam
    "Project update: The documents have been shared with the team.",  # Not Spam
    "Urgent: Verify your account information immediately.",  # Spam
    "Your package has been shipped and will arrive soon.",  # Not Spam
]

# Labels: 1 for spam, 0 for not spam
labels = [1, 1, 0, 0, 1, 0, 1, 0]

# Step 2: Convert text to features using CountVectorizer
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(emails)

# Step 3: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.25, random_state=42)

# Step 4: Train the Naive Bayes model
model = MultinomialNB()
model.fit(X_train, y_train)

# Step 5: Test the model on the test data
y_pred = model.predict(X_test)

# Step 6: Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

# Step 7: Predict new emails
new_emails = [
    "Win a free iPhone by clicking this link!",  # Likely Spam
    "Hi John, please find the attached file for your review.",  # Not Spam
    "Hello, how are you doing today?",  # Not Spam
    "Limited time offer! Get 50% off all products.",  # Likely Spam
]
new_X = vectorizer.transform(new_emails)
predictions = model.predict(new_X)

# Output predictions
for email, prediction in zip(new_emails, predictions):
    label = "Spam" if prediction == 1 else "Not Spam"
    print(f"Email: {email}\nPrediction: {label}\n")

Accuracy: 0.5
Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00         1
           1       0.50      1.00      0.67         1

    accuracy                           0.50         2
   macro avg       0.25      0.50      0.33         2
weighted avg       0.25      0.50      0.33         2

Email: Win a free iPhone by clicking this link!
Prediction: Spam

Email: Hi John, please find the attached file for your review.
Prediction: Not Spam

Email: Hello, how are you doing today?
Prediction: Spam

Email: Limited time offer! Get 50% off all products.
Prediction: Not Spam



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [3]:
import pandas as pd
import statsmodels.api as sm

def perform_ols_regression(file_path):
    # Load dataset from Excel
    df = pd.read_excel(file_path)
    
    # Extract independent and dependent variables
    X = df['House Size (sqft)']
    y = df['Price ($1000s)']
    
    # Add a constant for the intercept
    X = sm.add_constant(X)
    
    # Fit the OLS regression model
    model = sm.OLS(y, X).fit()
    
    # Print the regression summary
    print(model.summary())
    
    return model

# Example usage
file_path = "d:\\tong\\teaching\\fin7047\\2025fall\\excel\\house_prices.xlsx"  # Update with the correct path
ols_model = perform_ols_regression(file_path)

                            OLS Regression Results                            
Dep. Variable:         Price ($1000s)   R-squared:                       0.985
Model:                            OLS   Adj. R-squared:                  0.984
Method:                 Least Squares   F-statistic:                     1159.
Date:                Tue, 23 Sep 2025   Prob (F-statistic):           8.53e-18
Time:                        13:47:04   Log-Likelihood:                -68.087
No. Observations:                  20   AIC:                             140.2
Df Residuals:                      18   BIC:                             142.2
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                        coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------
const                47.7919      6.29

In [4]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

# Define file path (update this with the correct path on your machine)
file_path = r"d:\tong\teaching\fin7047\2025fall\excel\house_prices.xlsx"

# Load dataset
df = pd.read_excel(file_path)

# Assume the last column is the dependent variable (Y) and others are independent variables (X)
X = df.iloc[:, :-1]  # Independent variables
y = df.iloc[:, -1]   # Dependent variable

# Split data into training (70%) and testing (30%) sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Add a constant for intercept (needed for OLS regression)
X_train = sm.add_constant(X_train)
X_test = sm.add_constant(X_test)

# Fit OLS model on training data
model = sm.OLS(y_train, X_train).fit()

# Predict on test data
y_pred = model.predict(X_test)

# Display regression summary (based on training set)
print(model.summary())

# Compute test set performance metrics
test_r2 = r2_score(y_test, y_pred)  # R² score
test_rmse = mean_squared_error(y_test, y_pred, squared=False)  # RMSE

                            OLS Regression Results                            
Dep. Variable:         Price ($1000s)   R-squared:                       0.983
Model:                            OLS   Adj. R-squared:                  0.981
Method:                 Least Squares   F-statistic:                     690.1
Date:                Tue, 23 Sep 2025   Prob (F-statistic):           5.66e-12
Time:                        13:48:32   Log-Likelihood:                -48.446
No. Observations:                  14   AIC:                             100.9
Df Residuals:                      12   BIC:                             102.2
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                        coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------
const                50.0498      7.91

  res = hypotest_fun_out(*samples, **kwds)


In [6]:
import yfinance as yf
import pandas as pd
import statsmodels.api as sm

# Define the tickers and date range
btc_ticker = "BTC-USD"
sp500_ticker = "^GSPC"
start_date = "2020-01-01"
end_date = "2025-09-20"

# Download historical adjusted closing price data
btc_data = yf.download(btc_ticker, start=start_date, end=end_date, progress=False)["Close"]
sp500_data = yf.download(sp500_ticker, start=start_date, end=end_date, progress=False)["Close"]

# Combine into a single DataFrame with aligned dates
data = pd.concat([btc_data, sp500_data], axis=1)
data.columns = ["BTC", "S&P500"]

# Calculate daily returns
data = data.pct_change().dropna()

# Save the daily return dataset to a CSV file
data.to_csv("btc_sp500_daily_returns.csv")

# Run regression: BTC returns ~ S&P 500 returns
X = data["S&P500"]
X = sm.add_constant(X)  # Add intercept term
y = data["BTC"]

model = sm.OLS(y, X).fit()

# Display regression results
print(model.summary())

# Confirm saved dataset location
print("Daily returns dataset saved as 'btc_sp500_daily_returns.csv'.")


  btc_data = yf.download(btc_ticker, start=start_date, end=end_date, progress=False)["Close"]
  sp500_data = yf.download(sp500_ticker, start=start_date, end=end_date, progress=False)["Close"]
  data = data.pct_change().dropna()


                            OLS Regression Results                            
Dep. Variable:                    BTC   R-squared:                       0.123
Model:                            OLS   Adj. R-squared:                  0.122
Method:                 Least Squares   F-statistic:                     292.1
Date:                Tue, 23 Sep 2025   Prob (F-statistic):           2.18e-61
Time:                        13:54:10   Log-Likelihood:                 4337.8
No. Observations:                2087   AIC:                            -8672.
Df Residuals:                    2085   BIC:                            -8660.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0015      0.001      2.208      0.0