In [None]:
import pandas as pd
import sqlite3

# Load Excel data into a pandas DataFrame
data = pd.read_excel('data/Europe.xlsx')

# Create a SQLite database and write the DataFrame to a table
conn = sqlite3.connect('europe_data.db')
data.to_sql('europe', conn, index=False)

# Query the data from the SQLite table
query = '''
SELECT "Computer Sales", "GNP per Head", "Unemployment Rate", "%age spend on education"
FROM europe
'''
df = pd.read_sql_query(query, conn)

# Perform multiple regression
from sklearn.linear_model import LinearRegression

X = df[['GNP per Head', 'Unemployment Rate', '%age spend on education']]
y = df['Computer Sales']

model = LinearRegression()
model.fit(X, y)

print("Intercept:", model.intercept_)
print("Coefficients:", model.coef_)

# Make predictions
new_data = pd.DataFrame({
    'GNP per Head': [50000], 
    'Unemployment Rate': [5.0],
    '%age spend on education': [6.0]
})

predicted_sales = model.predict(new_data)
print("Predicted computer sales:", predicted_sales)

In [None]:
import pandas as pd
import sqlite3

# Load the Excel file and sheet into pandas dataframe
data = pd.read_excel('data/Europe.xlsx', sheet_name='Regression2', usecols='B:E', skiprows=16, nrows=20)

# Create a SQLite3 database and table
conn = sqlite3.connect('Europe.db')
data.to_sql('regression_analysis', conn, if_exists='replace', index=False)

# Assuming 'regression_analysis' table is now created with columns ['Intercept', 'GNP per head', '%age spend on education']
# Display the table contents
for row in conn.execute('SELECT * FROM regression_analysis'):
    print(row)

# Simple SQL query to verify the data
query = """
SELECT * 
FROM regression_analysis
"""
df = pd.read_sql_query(query, conn)
print(df)

# Regression analysis equivalent in Python using statsmodels
import statsmodels.api as sm

# Assuming 'GNP per head' and '%age spend on education' are the significant variables
X = df[['GNP per head', '%age spend on education']]
y = df['Intercept']

# Adding a constant to the predictor variable set
X = sm.add_constant(X)

# Performing the regression and fitting the model
model = sm.OLS(y, X).fit()

# Printing the summary of the regression
print(model.summary())


In [None]:
import pandas as pd
import sqlite3

# Load the data from the Excel file
data = pd.read_excel('data/Autos.xlsx', sheet_name='Autos', usecols='C:L', skiprows=10, nrows=32)

# Create a connection to a new SQLite database
conn = sqlite3.connect('data/Autos.db')
data.to_sql('auto_sales', conn, if_exists='replace', index=False)

# Display the loaded table
for row in conn.execute('SELECT * FROM auto_sales LIMIT 5'):
    print(row)

# Prepare and execute a query to select seasonality data
query = """
SELECT Sales, Q1, Q2, Q3, LagGNP, LagUnemp, LagInt FROM auto_sales
"""
seasonality_data = pd.read_sql_query(query, conn)
print(seasonality_data.head())


In [None]:
import pandas as pd
import sqlite3
from sklearn.linear_model import LinearRegression

# Load Excel data into a pandas DataFrame  
data = pd.read_excel('Priceandads.xlsx', sheet_name='data', usecols='E:G', skiprows=3, nrows=166)

# Add columns for interaction and squared terms
data['Ad*P'] = data['Ad'] * data['Price']  
data['Price2'] = data['Price']**2
data['Ad2'] = data['Ad']**2

# Write the DataFrame to a SQLite database
conn = sqlite3.connect('Priceandads.db')
data.to_sql('data', conn, index=False)

# Query the data from SQLite
query = '''
SELECT Price, Ad, "Ad*P", Price2, Ad2, Sales  
FROM data;
'''
model_data = pd.read_sql(query, conn)

# Fit initial regression model
X = model_data.drop('Sales', axis=1)  
y = model_data['Sales']
model = LinearRegression()
model.fit(X, y)

# Check p-values and remove insignificant terms
import statsmodels.api as sm
X2 = sm.add_constant(X)
est = sm.OLS(y, X2)
est2 = est.fit()
print(est2.summary())

# Re-fit model without Price2
X_final = model_data[['Price', 'Ad', 'Ad*P', 'Ad2']]
X2_final = sm.add_constant(X_final)  
est_final = sm.OLS(y, X2_final)
est2_final = est_final.fit()
print(est2_final.summary())

In [None]:
import pandas as pd
import sqlite3
import numpy as np

# Load data from Excel into pandas
data = pd.read_excel('Retentiontemplate.xlsx', sheet_name='retention', usecols='F5:F8', nrows=4)

# Extract parameters 
ceilingRet = data.iloc[0,0]
currentretentionrate = data.iloc[2,0]  
currentspendpercustomer = data.iloc[1,0]

# Calculate kRet
kRet = -np.log(1 - currentretentionrate/ceilingRet) / currentspendpercustomer

# Load acquisition data
data = pd.read_excel('Retentiontemplate.xlsx', sheet_name='acquisition', usecols='F4:F7', nrows=4)

# Extract parameters
ceilingAcq = data.iloc[0,0]
currentacquisitionrate = data.iloc[2,0]
currentspendperprospect = data.iloc[1,0]

# Calculate kAcq  
kAcq = -np.log(1 - currentacquisitionrate/ceilingAcq) / currentspendperprospect

print(f"kRet: {kRet:.4f}")
print(f"kAcq: {kAcq:.4f}") 

# Create SQLite database and table
con = sqlite3.connect("Retentiontemplate.db")
data.to_sql("retention_data", con, if_exists="replace")

# Query data 
cur = con.cursor()
cur.execute("""
  SELECT ceilingRet, currentspendpercustomer, currentretentionrate  
  FROM retention_data
""")

retention_data = cur.fetchall()
print(retention_data)

# Use parameters to build retention model
def retention_rate(spend, ceilingRet, kRet):
  return ceilingRet * (1 - np.exp(-kRet*spend))

print(f"Retention rate with $60 spend: {retention_rate(60, ceilingRet, kRet):.2%}")

In [None]:
import sqlite3
import pandas as pd
import numpy as np

# Read in data from Excel, assuming customerretentionoriginal.xlsx
data = pd.read_excel('customerretentionoriginal.xls', sheet_name='original', usecols='D:M', skiprows=9, nrows=21) 

# Extract key inputs
beginning_customers = data.iloc[1,1]
market_size = data.iloc[1,9] 
profit_per_customer = data.iloc[2,1]
kAcq = data.iloc[1,13]  
kRet = data.iloc[2,13]
ceilingAcq = data.iloc[1,11]
ceilingRet = data.iloc[2,11]

# Initialize variables
years = 20
acquisition_spend = np.ones(years) * 1.0  # E11:E30
retention_spend = np.ones(years) * 8.0    # F11:F30
customers = np.zeros(years)
prospects = np.zeros(years)
profits = np.zeros(years)
marketing_costs = np.zeros(years)

# Set up SQLite database
con = sqlite3.connect("customerretention.db")
cur = con.cursor()

cur.execute("""CREATE TABLE IF NOT EXISTS data (
               year INTEGER PRIMARY KEY,
               beginning_customers REAL,
               acquisition_spend REAL,
               retention_spend REAL,
               prospects REAL,
               percentage_acquired REAL,
               fraction_retained REAL,  
               ending_customers REAL,
               ending_prospects REAL,
               profit REAL,
               marketing_cost REAL
               )""")

# Function to calculate key metrics for each year
def calculate_year(year, beginning_customers, acquisition_spend, retention_spend):
  prospects = market_size - beginning_customers
  percentage_acquired = ceilingAcq * (1 - np.exp(-kAcq * acquisition_spend))
  fraction_retained = ceilingRet * (1 - np.exp(-kRet * retention_spend))
  
  new_customers = percentage_acquired * prospects 
  retained_customers = fraction_retained * beginning_customers
  ending_customers = new_customers + retained_customers
  ending_prospects = market_size - ending_customers
  
  profit = 0.5 * profit_per_customer * (beginning_customers + ending_customers) - acquisition_spend * prospects - retention_spend * beginning_customers
  marketing_cost = acquisition_spend * prospects + retention_spend * beginning_customers
  
  return (prospects, percentage_acquired, fraction_retained, ending_customers, ending_prospects, profit, marketing_cost)

# Run simulation for 20 years
customers[0] = beginning_customers

for i in range(years):
  prospects[i], pct_acquired, frac_retained, customers[i], ending_prospects, profits[i], marketing_costs[i] = calculate_year(i, customers[i], acquisition_spend[i], retention_spend[i])
  
  if i < years-1:
    customers[i+1] = customers[i]
  
  # Insert results into database
  cur.execute(f"""INSERT INTO data VALUES (
                {i+1},
                {customers[i]},  
                {acquisition_spend[i]},
                {retention_spend[i]},
                {prospects[i]},
                {pct_acquired},
                {frac_retained},
                {customers[i]},
                {ending_prospects},
                {profits[i]},  
                {marketing_costs[i]}
                )""")

con.commit()

# Query data from SQLite
query = pd.read_sql_query("SELECT * FROM data", con)

# Calculate NPV of profits
NPV = np.npv(0.10, profits)

print(query)  
print(f"NPV of 20-year profits: ${NPV:,.0f}")

# Close database connection
con.close()