In [None]:
import pandas as pd

### Data Collection

In [None]:
import requests

# URLs of the files
train_data_url = 'https://www.raphaelcousin.com/modules/data-science-practice/module3/exercise/module3_exercise_train.csv'
test_data_url = 'https://www.raphaelcousin.com/modules/data-science-practice/module3/exercise/module3_exercise_test.csv'

# Function to download a file
def download_file(url, file_name):
    response = requests.get(url)
    response.raise_for_status()  # Ensure we notice bad responses
    with open(file_name, 'wb') as file:
        file.write(response.content)
    print(f'Downloaded {file_name} from {url}')

# Downloading the files
download_file(train_data_url, 'module3_exercise_train.csv')
download_file(test_data_url, 'module3_exercise_test.csv')

In [None]:
df_train =  pd.read_csv("module3_exercise_train.csv", sep=",", index_col='id')

In [None]:
df_train.columns

### Data Preprocessing

In [None]:
df_train.fillna(-1)

In [None]:
df_train.dtypes

In [None]:
mapping = {
    "zero": 0,
    "one": 1,
    "two": 2,
    "three": 3,
    "four": 4,
    "five": 5,
    "six": 6,
    "seven": 7,
    "eight": 8,
    "nine": 9,
    "ten": 10
}
df_train["GarageCars"] = df_train["GarageCars"].map(mapping).astype("Int64")

In [None]:
df_train.dtypes

### Model Building and Evaluation

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split

In [None]:
#df_num = df_train.select_dtypes(include=['int64', 'float64'])

df_train = df_train.astype(float)
df_train = df_train.fillna(df_train.mean())

# Séparer features (X) et target (y)
X_train = df_train.drop("SalePrice", axis=1).to_numpy()
y_train = df_train["SalePrice"].to_numpy()

# Train/test split
#X_train, X_test, y_train, y_test = train_test_split(
#    X, y, test_size=0.2, random_state=42
#)

In [None]:
X_train

In [None]:
# Modèle de régression linéaire
lin_reg = LinearRegression()
lin_reg.fit(X_train, y_train)

In [None]:
df_train = df_train.astype(float)
df_train = df_train.fillna(df_train.mean())

# Séparer features (X) et target (y)

X_train = df_train.drop("SalePrice", axis=1).to_numpy()

### Generating Submission File

In [None]:
X_test =  pd.read_csv("module3_exercise_test.csv", sep=",", index_col='id')
X_test["GarageCars"] = X_test["GarageCars"].map(mapping).astype("Int64")
X_test = X_test.astype(float)
X_test = X_test.fillna(df_train.mean())

# Séparer features (X) et target (y)

y_pred= lin_reg.predict(X_test)

In [None]:
print(y_pred)

In [None]:
submission = pd.DataFrame({
    'id': X_test.index,
    'SalePrice': y_pred # your_prediction
})

submission.to_csv('submission.csv', index=False, sep=',')
submission.head()