In [1]:
# подготовка данных и обучение модели
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error


In [2]:
# пример данных
data = pd.DataFrame({
    'user_id': [1, 2, 3, 4, 5, 6],
    'treatment': [1, 0, 1, 0, 1, 0],
    'num_trips': [10, 8, 12, 7, 9, 6],
    'avg_trip_cost': [15, 12, 14, 11, 13, 10],
    'gender': ['M', 'F', 'M', 'F', 'M', 'F'],
    'location': ['City A', 'City B', 'City A', 'City B', 'City A', 'City B'],
    'target': [5, 2, 7, 1, 4, 0]
})


In [3]:
data

Unnamed: 0,user_id,treatment,num_trips,avg_trip_cost,gender,location,target
0,1,1,10,15,M,City A,5
1,2,0,8,12,F,City B,2
2,3,1,12,14,M,City A,7
3,4,0,7,11,F,City B,1
4,5,1,9,13,M,City A,4
5,6,0,6,10,F,City B,0


In [4]:

# трансформация целевой переменной
data['target_transformed'] = data.apply(
    lambda row: row['target'] if row['treatment'] == 1 else -row['target'], axis=1)

data

Unnamed: 0,user_id,treatment,num_trips,avg_trip_cost,gender,location,target,target_transformed
0,1,1,10,15,M,City A,5,5
1,2,0,8,12,F,City B,2,-2
2,3,1,12,14,M,City A,7,7
3,4,0,7,11,F,City B,1,-1
4,5,1,9,13,M,City A,4,4
5,6,0,6,10,F,City B,0,0


In [5]:

# целевая переменная и признаки
y = data['target_transformed']
X = data.drop(['target', 'target_transformed', 'user_id'], axis=1)

# разделение данных
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# предобработка данных
numeric_features = ['num_trips', 'avg_trip_cost']
categorical_features = ['gender', 'location']

In [6]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numeric_features),
        ('cat', OneHotEncoder(), categorical_features)
    ])

data_transformed=preprocessor.fit_transform(X,y)

In [8]:
data_transformed=pd.DataFrame(data_transformed, columns=preprocessor.get_feature_names_out())


In [9]:
data_transformed

Unnamed: 0,num__num_trips,num__avg_trip_cost,cat__gender_F,cat__gender_M,cat__location_City A,cat__location_City B
0,0.676123,1.46385,0.0,1.0,1.0,0.0
1,-0.338062,-0.29277,1.0,0.0,0.0,1.0
2,1.690309,0.87831,0.0,1.0,1.0,0.0
3,-0.845154,-0.87831,1.0,0.0,0.0,1.0
4,0.169031,0.29277,0.0,1.0,1.0,0.0
5,-1.352247,-1.46385,1.0,0.0,0.0,1.0


In [10]:

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numeric_features),
        ('cat', OneHotEncoder(), categorical_features)
    ])

# модель
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', RandomForestRegressor(random_state=42))
])

# обучение модели
model.fit(X_train, y_train)


In [13]:
X_test

Unnamed: 0,treatment,num_trips,avg_trip_cost,gender,location
0,1,10,15,M,City A
1,0,8,12,F,City B


In [11]:

# оценка модели
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)

print("Mean Squared Error:", mse)
print("Predictions:", y_pred)


Mean Squared Error: 1.6202
Predictions: [ 4.98 -0.2 ]


In [14]:
# применение модели ко всем данным для принятия решения о предоставлении скидок
all_data_predictions = model.predict(X)

# устанавливаем порогового значения для предоставления скидок
threshold = 1.0  # пороговое значение можно настроить

In [15]:
# определение пользователей, которым рекомендуется предоставить скидку
data["predicted_uplift"] = all_data_predictions
data

Unnamed: 0,user_id,treatment,num_trips,avg_trip_cost,gender,location,target,target_transformed,predicted_uplift
0,1,1,10,15,M,City A,5,5,4.98
1,2,0,8,12,F,City B,2,-2,-0.2
2,3,1,12,14,M,City A,7,7,5.64
3,4,0,7,11,F,City B,1,-1,-0.44
4,5,1,9,13,M,City A,4,4,4.24
5,6,0,6,10,F,City B,0,0,0.02


In [16]:
data["give_discount"] = data["predicted_uplift"] > threshold
data

Unnamed: 0,user_id,treatment,num_trips,avg_trip_cost,gender,location,target,target_transformed,predicted_uplift,give_discount
0,1,1,10,15,M,City A,5,5,4.98,True
1,2,0,8,12,F,City B,2,-2,-0.2,False
2,3,1,12,14,M,City A,7,7,5.64,True
3,4,0,7,11,F,City B,1,-1,-0.44,False
4,5,1,9,13,M,City A,4,4,4.24,True
5,6,0,6,10,F,City B,0,0,0.02,False


In [17]:
# вывод пользователей, которым рекомендуется предоставить скидку
users_to_give_discount = data[data["give_discount"]]

print("Users to give discount:")
print(users_to_give_discount[["user_id", "predicted_uplift"]])

Users to give discount:
   user_id  predicted_uplift
0        1              4.98
2        3              5.64
4        5              4.24
