In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import spearmanr
import sqlite3

conn = sqlite3.connect('database.db')

# Query to select all data from the table in the database
query = "SELECT * FROM data"

# Load the data into a pandas DataFrame
df = pd.read_sql_query(query, conn)
print(df)

# Convert 'dteday' to datetime format and set it as index for the dataframe
df['dteday'] = pd.to_datetime(df['dteday'])
df.set_index('dteday', inplace=True)

# Calculate and print Spearman's correlation coefficient between 'temp' and 'cnt'
corr, _ = spearmanr(df['temp'], df['cnt'])
print('Spearmans correlation: %.3f' % corr)

# Close the database connection
conn.close()

       instant               dteday      temp   atemp   hum  windspeed  \
0            1  2011-01-01 00:00:00  0.224490  0.2879  0.81   0.000000   
1            2  2011-01-01 00:00:00  0.204082  0.2727  0.80   0.000000   
2            3  2011-01-01 00:00:00  0.204082  0.2727  0.80   0.000000   
3            4  2011-01-01 00:00:00  0.224490  0.2879  0.75   0.000000   
4            5  2011-01-01 00:00:00  0.224490  0.2879  0.75   0.000000   
...        ...                  ...       ...     ...   ...        ...   
17374    17375  2012-12-31 00:00:00  0.244898  0.2576  0.60   0.193018   
17375    17376  2012-12-31 00:00:00  0.244898  0.2576  0.60   0.193018   
17376    17377  2012-12-31 00:00:00  0.244898  0.2576  0.60   0.193018   
17377    17378  2012-12-31 00:00:00  0.244898  0.2727  0.56   0.157870   
17378    17379  2012-12-31 00:00:00  0.244898  0.2727  0.65   0.157870   

       casual  registered  cnt  season_1  ...  weekday_3  weekday_4  \
0           3          13   16         1

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import pearsonr
import sqlite3
import statsmodels.api as sm

conn = sqlite3.connect('database.db')

# Query to select all data from the table in the database
query = "SELECT * FROM data"

# Load the data into a pandas DataFrame
df = pd.read_sql_query(query, conn)

# Convert 'dteday' to datetime format and set it as index for the dataframe
df['dteday'] = pd.to_datetime(df['dteday'])
df.set_index('dteday', inplace=True)

# Create a single 'season' column from the four 'season_X' columns
df['season'] = df[['season_1', 'season_2', 'season_3', 'season_4']].idxmax(axis=1)

# Map the 'season_X' column names to numerical values
season_mapping = {'season_1': 1, 'season_2': 2, 'season_3': 3, 'season_4': 4}
df['season'] = df['season'].map(season_mapping)

# Create interaction terms
df['temp_season'] = df['temp'] * df['season']

# Fit a linear regression model with interaction term
X = df[['temp', 'season', 'temp_season']]
X = sm.add_constant(X)  # add a constant term to the predictor
y = df['cnt']
model = sm.OLS(y, X)
results = model.fit()

# Print the summary of the regression model
print(results.summary())

# Close the database connection
conn.close()


                            OLS Regression Results                            
Dep. Variable:                    cnt   R-squared:                       0.167
Model:                            OLS   Adj. R-squared:                  0.167
Method:                 Least Squares   F-statistic:                     1164.
Date:                Wed, 14 Jun 2023   Prob (F-statistic):               0.00
Time:                        19:34:22   Log-Likelihood:            -1.1345e+05
No. Observations:               17379   AIC:                         2.269e+05
Df Residuals:                   17375   BIC:                         2.269e+05
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                  coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------
const         -29.6135      7.617     -3.888      

In [4]:
import statsmodels.api as sm

conn = sqlite3.connect('database.db')

# Query to select all data from the table in the database
query = "SELECT * FROM data"

# Load the data into a pandas DataFrame
df = pd.read_sql_query(query, conn)

# Convert 'dteday' to datetime format and set it as index for the dataframe
df['dteday'] = pd.to_datetime(df['dteday'])
df.set_index('dteday', inplace=True)

# Create a single 'weather' column from the four 'weathersit_X' columns
df['weather'] = df[['weathersit_1', 'weathersit_2', 'weathersit_3', 'weathersit_4']].idxmax(axis=1)

# Map the 'weathersit_X' column names to numerical values
weather_mapping = {'weathersit_1': 1, 'weathersit_2': 2, 'weathersit_3': 3, 'weathersit_4': 4}
df['weather'] = df['weather'].map(weather_mapping)

# Fit a linear regression model with 'temp', 'hum', 'windspeed', and 'weather'
X = df[['temp', 'hum', 'windspeed', 'weather']]
X = sm.add_constant(X)  # add a constant term to the predictor
y = df['cnt']
model = sm.OLS(y, X)
results = model.fit()

# Print the summary of the regression model
print(results.summary())

# Close the database connection
conn.close()


                            OLS Regression Results                            
Dep. Variable:                    cnt   R-squared:                       0.252
Model:                            OLS   Adj. R-squared:                  0.252
Method:                 Least Squares   F-statistic:                     1462.
Date:                Wed, 14 Jun 2023   Prob (F-statistic):               0.00
Time:                        11:55:14   Log-Likelihood:            -1.1252e+05
No. Observations:               17379   AIC:                         2.251e+05
Df Residuals:                   17374   BIC:                         2.251e+05
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const        180.3369      6.178     29.188      0.0