In [1]:
import pandas as pd

### Data Collection

In [2]:
import requests

# URLs of the files
train_data_url = 'https://www.raphaelcousin.com/modules/module3/exercise/module3_exercise_train.csv'
test_data_url = 'https://www.raphaelcousin.com/modules/module3/exercise/module3_exercise_test.csv'

# Function to download a file
def download_file(url, file_name):
    response = requests.get(url)
    response.raise_for_status()  # Ensure we notice bad responses
    with open(file_name, 'wb') as file:
        file.write(response.content)
    print(f'Downloaded {file_name} from {url}')

# Downloading the files
download_file(train_data_url, 'module3_exercise_train.csv')
download_file(test_data_url, 'module3_exercise_test.csv')

ModuleNotFoundError: No module named 'requests'

In [None]:
df_train =  pd.read_csv("module3_exercise_train.csv", sep=",", index_col='id')

### Data Preprocessing

In [None]:
text_to_int = {
    'one': 1,
    'two': 2,
    'three': 3,
}

def minimal_data_prep(data_df):
    data_df['GarageCars'] = data_df['GarageCars'].map(text_to_int)
    data_df = data_df.fillna(data_df.mean())
    return data_df

df_train = minimal_data_prep(df_train)
df_train.head()

### Model Building and Evaluation

In [1]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error

In [None]:
X_train = df_train.drop('SalePrice', axis=1, inplace=False)
y_train = df_train['SalePrice']
X_train.head()

In [None]:
model = RandomForestRegressor()
model.fit(X_train, y_train)

In [None]:
def pred_eval(model, X_data, y_target):
    y_pred = model.predict(X_data)
    mae = mean_absolute_error(y_pred,y_target)
    print("Mean Absolute Error :",mae)
    return y_pred

In [None]:
# Evaluation sur les données d'entraînement
y_pred = pred_eval(model,X_train,y_train)

### Generating Submission File

In [None]:
X_test =  pd.read_csv("module3_exercise_test.csv", sep=",", index_col='id')
X_test = minimal_data_prep(X_test)
X_test.head()

In [None]:
# Evaluation sur les données de test
y_pred_test = model.predict(X_test)

In [None]:
submission = pd.DataFrame({
    'id': X_test.index,
    'SalePrice': 0 # your_prediction
})

submission.to_csv('submission.csv', index=False, sep=',')
submission.head()