# Linear Regression Assignment
Denna notebook demonstrerar funktionaliteten hos LinearRegression-klassen.



In [1]:
import pandas as pd
import numpy as np
from scipy import stats
from Statistiska_metoder_labb_klass import LinjärRegression



In [2]:
df = pd.read_csv("C:\\Programering\\It högskolan\\Statistiska-Metoder\\Data\\Small-diameter-flow.csv")

In [3]:
Y = df["Flow"]
X = np.column_stack([np.ones(Y.shape[0]),df["Kinematic"], df["Geometric"], df["Inertial"], df["Observer"]])

In [4]:
df.head()

Unnamed: 0.1,Unnamed: 0,Flow,Kinematic,Geometric,Inertial,Observer
0,0,-17.047687,-0.584344,-6.896327,-14.32356,0.0
1,1,-17.043179,-0.584344,-6.896327,-14.32356,0.0
2,2,-17.043179,-0.584344,-6.896327,-14.32356,0.0
3,3,-11.182193,1.242506,-5.069476,-11.583284,0.0
4,4,-11.18443,1.242506,-5.069476,-11.583284,0.0


In [5]:
df.corr()["Flow"]

Unnamed: 0    0.140553
Flow          1.000000
Kinematic     0.863403
Geometric     0.995794
Inertial      0.903310
Observer      0.181490
Name: Flow, dtype: float64

In [6]:
model = LinjärRegression(X, Y)

In [7]:
b = np.linalg.pinv(X.T @ X) @ X.T @ Y  # Koefficienter
n = Y.shape[0]                         # Antal observationer
d = len(b)-1                          # Antal prediktorer
SSE = np.sum(np.square(Y - (X @ b)))  # Sum of Squared Errors
SST = np.sum(np.square(Y - np.mean(Y))) # Total Sum of Squares
SSR = SST - SSE                       # Sum of Squares due to Regression
R2 = 1 - SSE/SST                      # R-kvadrat
var = SSE/(n-d-1)                     # Varians
S = np.sqrt(var)                      # Standardavvikelse

print(f"Koefficienter: {b}")
print(f"Antal observationer: {n}")
print(f"Antal prediktorer: {d}")
print(f"Sum of Squared Errors (SSE): {SSE}")
print(f"Total Sum of Squares (SST): {SST}")
print(f"Sum of Squares due to Regression (SSR): {SSR}")
print(f"R²: {R2}")
print(f"Varians: {var}")
print(f"Standardavvikelse: {S}")

Koefficienter: [-2.58376909  0.87000771  3.60315069 -0.75188958  0.0168805 ]
Antal observationer: 198
Antal prediktorer: 4
Sum of Squared Errors (SSE): 1.2105524599028277
Total Sum of Squares (SST): 425.14419302220585
Sum of Squares due to Regression (SSR): 423.933640562303
R²: 0.9971526073276518
Varians: 0.00627229253835662
Standardavvikelse: 0.07919780639864099


In [8]:
# Utskrifter av resultaten
print(f"Antal observationer: {model.n}")
print(f"Varians: {model.varians()}")
print(f"Standardavvikelse: {model.standardavvikelse()}")
print(f"Signifikans av regressionen: {model.signifikans()}")
print(f"R²-värde: {model.r_kvadrat()}")


Antal observationer: 198
Varians: 0.00627229253835662
Standardavvikelse: 0.07919780639864099
Signifikans av regressionen: 1.72651823461757e-139
R²-värde: 0.9971526073291292
4
[-2.58376909  0.87000771  3.60315069 -0.75188958  0.0168805 ]
