In [279]:
!python3 -m pip install pandas scikit-learn


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.1[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3.11 -m pip install --upgrade pip[0m


In [280]:
import pandas as pd

In [281]:
df = pd.read_csv("linear_regression_params.tsv", sep="\t")
r_values = pd.read_csv("linear_regression_r_vals.tsv", sep="\t")

df = df.join(r_values).sample(frac=1)

df

Unnamed: 0,Income,Willingness to own a pet,Number of people in household,Number of toddlers,Floor area,Time avaliable,Other pet expenses,Ferret r estimate,Rabbit r estimate,Guinea pig r estimate,Dog r estimate
4,61979,0,8,7,138,2,1750,-2147483648,-2147483648,-2147483648,-2147483648
24,108813,1,7,2,188,0,5250,-32,-15,7,-24
2,86250,1,3,0,315,2,5250,20,23,45,16
16,38296,0,5,0,461,3,3500,-2147483648,-2147483648,-2147483648,-2147483648
27,16623,0,5,3,249,2,5250,-2147483648,-2147483648,-2147483648,-2147483648
19,120818,1,3,1,233,1,0,-12,10,30,1
25,57313,1,7,3,309,2,0,16,38,56,27
23,48275,1,4,2,212,2,3500,-26,-11,11,-21
22,65172,0,7,6,452,2,0,-2147483648,-2147483648,-2147483648,-2147483648
20,93287,1,6,5,317,0,1750,-25,-16,6,-23


In [282]:
# Transformation list
min_areas = [5, 6, 1, 20]
min_times = [3, 1, 1, 1]
costs_aud = [1500, 700, 800, 2800]

In [283]:
def transform_dataset(i) -> pd.DataFrame:
  transformed_dataset = pd.DataFrame()
  transformed_dataset["T1"] = df["Income"] * 0.1 - costs_aud[i] - df["Other pet expenses"]
  transformed_dataset["T2"] = df["Willingness to own a pet"] - 1
  transformed_dataset["T3"] = -df["Number of toddlers"]
  transformed_dataset["T4"] = df["Number of people in household"] - df["Number of toddlers"]
  transformed_dataset["T5"] = df["Floor area"] - min_areas[i]
  transformed_dataset["T6"] = df["Time avaliable"] - min_times[i]
  transformed_dataset["R"] = df.iloc[:, 7 + i]

  return transformed_dataset

transform_dataset(0)

Unnamed: 0,T1,T2,T3,T4,T5,T6,R
4,2947.9,-1,-7,1,133,-1,-2147483648
24,4131.3,0,-2,5,183,-3,-32
2,1875.0,0,0,3,310,-1,20
16,-1170.4,-1,0,5,456,0,-2147483648
27,-5087.7,-1,-3,2,244,-1,-2147483648
19,10581.8,0,-1,2,228,-2,-12
25,4231.3,0,-3,4,304,-1,16
23,-172.5,0,-2,2,207,-1,-26
22,5017.2,-1,-6,1,447,-1,-2147483648
20,6078.7,0,-5,1,312,-3,-25


In [284]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

In [285]:
print("p, a1, a2, a3, a4, a5, a6, b")
for i in range(4):
  dataset = transform_dataset(i)
  train_data, test_data = train_test_split(dataset, test_size=0.1)
  train_X = train_data.iloc[:, 0:6]
  test_X = test_data.iloc[:, 0:6]
  train_y = train_data.iloc[:, 6]
  test_y = test_data.iloc[:, 6]

  reg = LinearRegression().fit(train_X, train_y)
  print(i + 2, end=" & ")

  for coef in reg.coef_:
    print(coef, end=" & ")
  print(reg.intercept_, end=" \\\\\n")

p, a1, a2, a3, a4, a5, a6, b
2 & 0.0016712217072667932 & 2147483635.1689243 & 1.5346691829261947 & 1.8288602274962744 & 0.008877878289497054 & 10.383853927181429 & -1.9522998332977295 \\
3 & 0.0011782574736476008 & 2147483655.1595526 & -1.4021082778073592 & 1.524341113288749 & 0.07498320007793945 & 6.478050954353814 & -31.07266330718994 \\
4 & 0.0011132784850400223 & 2147483676.862434 & -1.6047571843832646 & 3.517914663540725 & 0.03370284428013502 & 9.307391127080066 & -3.385354995727539 \\
5 & 0.0013604107815131485 & 2147483646.823722 & -0.7776510324566942 & 2.415721885984468 & 0.029251678581615127 & 8.646331874215226 & -24.159748792648315 \\


Output:
```
p, a1, a2, a3, a4, a5, a6, b
2 & 0.002084189759064954 & 2147483635.2952528 & 1.7495598940082078 & 1.4504797097401232 & 0.00033069967929300477 & 9.878615735906964 & 1.551311731338501 \\
3 & 0.0008007393719957749 & 2147483653.007195 & -0.3768722076353776 & 4.058774350343964 & 0.04027300210301911 & 10.01104525180109 & -26.52982532978058 \\
4 & 0.0010406408980429198 & 2147483676.4044113 & -0.47073526893445033 & 2.7569376659085965 & 0.03620340345676056 & 8.982237990887667 & -0.12039613723754883 \\
5 & 0.0007198881788241762 & 2147483646.5731955 & -0.050486441771252874 & 2.882273889993125 & 0.03145083563520378 & 7.690185246391873 & -24.836780428886414 \\
```