# Lựa chọn feature


In [1]:
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import SelectFromModel
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.model_selection import cross_val_score
from sklearn.svm import LinearSVC
from sklearn.model_selection import KFold, StratifiedKFold, RepeatedStratifiedKFold
import warnings
warnings.filterwarnings("ignore")
import numpy as np

k_fold = 10
seed = 42
np.random.seed(seed)

## 1. Load data


In [2]:
matches_df = pd.read_csv(
    "../../../raw_data/clean_and_transformation/manchester_city.csv"
)

# Chọn các feature cần thiết
matches_df = matches_df[
    [
        "result",
        "venue",
        "win_before_team1",
        "win_before_team2",
        "draw_before_team1",
        "draw_before_team2",
        "opponent",
        "history_team1_win_team2",
        "history_team1_draw_team2",
        "history_team1_lose_team2",
        "is_opponent_big6",
    ]
]

cv_info = {
    "n_splits": k_fold,
    "shuffle": True,
    "random_state": seed,
}

kfold_cv = KFold(**cv_info)
stratify_cv = StratifiedKFold(**cv_info)
repeated_stratify_cv = RepeatedStratifiedKFold(n_splits=k_fold, n_repeats=3, random_state=seed)

applied_cv = repeated_stratify_cv

In [3]:
matches_df.shape

(836, 11)

In [4]:
matches_df["result"].value_counts()

result
W    456
L    219
D    161
Name: count, dtype: int64

## 2. Dùng mô hình để lựa chọn feature


In [5]:
X = matches_df.drop("result", axis=1)
y = matches_df["result"]

# Xác định các cột số và phân loại
numerical_cols = X.select_dtypes(include=["int64", "float64"]).columns
categorical_cols = X.select_dtypes(include=["object", "category"]).columns

numerical_cols, categorical_cols

(Index(['win_before_team1', 'win_before_team2', 'draw_before_team1',
        'draw_before_team2', 'history_team1_win_team2',
        'history_team1_draw_team2', 'history_team1_lose_team2',
        'is_opponent_big6'],
       dtype='object'),
 Index(['venue', 'opponent'], dtype='object'))

In [6]:
# Tạo ColumnTransformer
preprocessor = ColumnTransformer(
    transformers=[
        ("num", StandardScaler(), numerical_cols),
        ("cat", OneHotEncoder(handle_unknown="ignore"), categorical_cols),
    ]
)

### Định nghĩa Pipelines


In [7]:
# Định nghĩa các mô hình cơ bản
logit = LogisticRegression(random_state=seed)

rf = RandomForestClassifier(
    n_estimators=100,
    random_state=seed,
)

svc = LinearSVC(
    C=0.05,
    penalty="l1",
    dual=False,
    random_state=seed,
)

# Tạo các pipelines kết hợp preprocessor và mô hình
pipe_rf_without_fs = Pipeline([("preprocessor", preprocessor), ("classifier", logit)])

pipe_svc_without_fs = Pipeline([("preprocessor", preprocessor), ("classifier", logit)])

pipe_rf = Pipeline(
    [
        ("preprocessor", preprocessor),
        ("feature_selection", SelectFromModel(rf)),
        ("classifier", logit),
    ]
)

pipe_svc = Pipeline(
    [
        ("preprocessor", preprocessor),
        ("feature_selection", SelectFromModel(svc)),
        ("classifier", logit),
    ]
)

### Huấn luyện và đánh giá các Pipelines


In [8]:
# Huấn luyện và đánh giá pipeline sử dụng RandomForest
scores_rf_without_fs = cross_val_score(
    pipe_rf_without_fs,
    X,
    y,
    cv=KFold(
        n_splits=k_fold,
        shuffle=True,
        random_state=seed,
    ),
    scoring="accuracy",
    error_score="raise",
)

scores_rf = cross_val_score(
    pipe_rf,
    X,
    y,
    cv=KFold(
        n_splits=k_fold,
        shuffle=True,
        random_state=seed,
    ),
    scoring="accuracy",
    error_score="raise",
)

# Huấn luyện và đánh giá pipeline sử dụng SVC
scores_svc_without_fs = cross_val_score(
    pipe_svc_without_fs,
    X,
    y,
    cv=KFold(
        n_splits=k_fold,
        shuffle=True,
        random_state=seed,
    ),
    scoring="accuracy",
    error_score="raise",
)

scores_svc = cross_val_score(
    pipe_svc,
    X,
    y,
    cv=KFold(
        n_splits=k_fold,
        shuffle=True,
        random_state=seed,
    ),
    scoring="accuracy",
    error_score="raise",
)

In [9]:
print("Độ chính xác trung bình của RandomForest pipeline không sử dụng feature selection:", scores_rf_without_fs.mean())
print( "Độ chính xác trung bình của RandomForest pipeline có sử dụng feature selection:", scores_rf.mean())
print("Độ chính xác trung bình của SVC pipeline không sử dụng feature selection:", scores_svc_without_fs.mean())
print("Độ chính xác trung bình của SVC pipeline có sử dụng feature selection:", scores_svc.mean())

Độ chính xác trung bình của RandomForest pipeline không sử dụng feature selection: 0.5681153184165233
Độ chính xác trung bình của RandomForest pipeline có sử dụng feature selection: 0.5705966724039013
Độ chính xác trung bình của SVC pipeline không sử dụng feature selection: 0.5681153184165233
Độ chính xác trung bình của SVC pipeline có sử dụng feature selection: 0.5646156052782559


### Thử nghiệm khi dùng cách chia fold khác (như RepeatedStratifiedKFold)

In [10]:
# Huấn luyện và đánh giá pipeline sử dụng RandomForest
scores_rf_without_fs = cross_val_score(
    pipe_rf_without_fs,
    X,
    y,
    cv=applied_cv,
    scoring="accuracy",
    error_score="raise",
)

scores_rf = cross_val_score(
    pipe_rf,
    X,
    y,
    cv=applied_cv,
    scoring="accuracy",
    error_score="raise",
)

# Huấn luyện và đánh giá pipeline sử dụng SVC
scores_svc_without_fs = cross_val_score(
    pipe_svc_without_fs,
    X,
    y,
    cv=applied_cv,
    scoring="accuracy",
    error_score="raise",
)

scores_svc = cross_val_score(
    pipe_svc,
    X,
    y,
    cv=applied_cv,
    scoring="accuracy",
    error_score="raise",
)

print(
    "Độ chính xác trung bình của RandomForest pipeline không sử dụng feature selection:",
    scores_rf_without_fs.mean(),
)
print(
    "Độ chính xác trung bình của RandomForest pipeline có sử dụng feature selection:",
    scores_rf.mean(),
)
print(
    "Độ chính xác trung bình của SVC pipeline không sử dụng feature selection:",
    scores_svc_without_fs.mean(),
)
print(
    "Độ chính xác trung bình của SVC pipeline có sử dụng feature selection:",
    scores_svc.mean(),
)

Độ chính xác trung bình của RandomForest pipeline không sử dụng feature selection: 0.5717823675655002
Độ chính xác trung bình của RandomForest pipeline có sử dụng feature selection: 0.5689567794989482
Độ chính xác trung bình của SVC pipeline không sử dụng feature selection: 0.5717823675655002
Độ chính xác trung bình của SVC pipeline có sử dụng feature selection: 0.5637789252247085


In [11]:
def clean_feature_names(feature_names):
    cleaned_names = set()  # Sử dụng set để tránh trùng lặp
    for name in feature_names:
        # Phân tách tên dựa trên tiền tố '__'
        parts = name.split("__")
        if len(parts) > 1:
            prefix = parts[0]
            base_name = parts[1]
            if prefix == "num":
                # Nếu tiền tố là 'num__', giữ nguyên base_name
                cleaned_names.add(base_name)
            elif "date_time" in base_name:
                # Đối với 'date_time', chỉ giữ 'date_time'
                cleaned_names.add("date_time")
            elif "formation_team1" in base_name:
                # Đối với 'team', chỉ giữ 'team'
                cleaned_names.add("formation_team1")
            elif "formation_team2" in base_name:
                # Đối với 'team', chỉ giữ 'team'
                cleaned_names.add("formation_team2")
            elif "_" in base_name:
                # Vd opponent_Arsenal thì cần giữ lại cả opponent_Arsenal
                cleaned_names.add(base_name)
            else:
                cleaned_names.add(base_name)
        else:
            # Không có tiền tố, giữ nguyên tên
            cleaned_names.add(name)
    return list(cleaned_names)

In [12]:
def find_best_list_features(estimator, n_splits=k_fold):
    kf = KFold(
        n_splits=n_splits,
        shuffle=True,
        random_state=seed,
    )

    best_accuracy = 0
    best_features = None

    results = []
    accuracy_mean = 0
    loop_idx = 1

    for train_index, test_index in kf.split(X):
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]

        estimator.fit(X_train, y_train)
        accuracy = estimator.score(X_test, y_test)
        accuracy_mean += accuracy

        print(f"Accuracy at loop {loop_idx}: {accuracy}")
        loop_idx += 1
        transformed_feature_names = estimator.named_steps[
            "preprocessor"
        ].get_feature_names_out()
        best_features_in_this_loop = estimator.named_steps[
            "feature_selection"
        ].get_support()

        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_features = best_features_in_this_loop

        results.append(
            {
                "feature_names": transformed_feature_names,
                "best_features": clean_feature_names(
                    transformed_feature_names[best_features_in_this_loop]
                ),
            }
        )

    accuracy_mean /= n_splits
    print(f"Accuracy mean: {accuracy_mean}")
    print(
        f"Best features with max accuracy: \n{clean_feature_names(transformed_feature_names[best_features])}"
    )

    return results

In [13]:
rf_list_best_features = find_best_list_features(pipe_rf)

Accuracy at loop 1: 0.5714285714285714
Accuracy at loop 2: 0.6071428571428571
Accuracy at loop 3: 0.47619047619047616
Accuracy at loop 4: 0.5238095238095238
Accuracy at loop 5: 0.6428571428571429
Accuracy at loop 6: 0.5833333333333334
Accuracy at loop 7: 0.5783132530120482
Accuracy at loop 8: 0.6144578313253012
Accuracy at loop 9: 0.5903614457831325
Accuracy at loop 10: 0.5180722891566265
Accuracy mean: 0.5705966724039013
Best features with max accuracy: 
['win_before_team1', 'history_team1_draw_team2', 'is_opponent_big6', 'history_team1_win_team2', 'win_before_team2', 'venue_Away', 'draw_before_team1', 'history_team1_lose_team2', 'venue_Home', 'draw_before_team2']


In [14]:
[print(result["best_features"]) for result in rf_list_best_features];

['win_before_team1', 'history_team1_draw_team2', 'is_opponent_big6', 'history_team1_win_team2', 'win_before_team2', 'venue_Away', 'draw_before_team1', 'history_team1_lose_team2', 'venue_Home', 'draw_before_team2']
['win_before_team1', 'history_team1_draw_team2', 'is_opponent_big6', 'history_team1_win_team2', 'win_before_team2', 'venue_Away', 'draw_before_team1', 'history_team1_lose_team2', 'venue_Home', 'draw_before_team2']
['win_before_team1', 'history_team1_draw_team2', 'is_opponent_big6', 'history_team1_win_team2', 'win_before_team2', 'venue_Away', 'draw_before_team1', 'history_team1_lose_team2', 'venue_Home', 'draw_before_team2']
['win_before_team1', 'history_team1_draw_team2', 'is_opponent_big6', 'history_team1_win_team2', 'win_before_team2', 'venue_Away', 'draw_before_team1', 'history_team1_lose_team2', 'venue_Home', 'draw_before_team2']
['win_before_team1', 'history_team1_draw_team2', 'is_opponent_big6', 'history_team1_win_team2', 'win_before_team2', 'venue_Away', 'draw_before_t

In [15]:
svc_list_best_features = find_best_list_features(pipe_svc)

Accuracy at loop 1: 0.5714285714285714
Accuracy at loop 2: 0.6071428571428571
Accuracy at loop 3: 0.4642857142857143
Accuracy at loop 4: 0.5119047619047619
Accuracy at loop 5: 0.6428571428571429
Accuracy at loop 6: 0.5714285714285714
Accuracy at loop 7: 0.5783132530120482
Accuracy at loop 8: 0.6144578313253012
Accuracy at loop 9: 0.5903614457831325
Accuracy at loop 10: 0.4939759036144578
Accuracy mean: 0.5646156052782558
Best features with max accuracy: 
['win_before_team1', 'is_opponent_big6', 'opponent_Liverpool', 'history_team1_win_team2', 'win_before_team2', 'venue_Away', 'draw_before_team1', 'history_team1_lose_team2', 'venue_Home', 'draw_before_team2']


In [16]:
[print(result["best_features"]) for result in svc_list_best_features];

['win_before_team1', 'history_team1_draw_team2', 'is_opponent_big6', 'opponent_Liverpool', 'history_team1_win_team2', 'win_before_team2', 'venue_Away', 'draw_before_team1', 'history_team1_lose_team2', 'venue_Home', 'draw_before_team2']
['win_before_team1', 'is_opponent_big6', 'history_team1_win_team2', 'win_before_team2', 'venue_Away', 'draw_before_team1', 'history_team1_lose_team2', 'venue_Home', 'draw_before_team2']
['win_before_team1', 'history_team1_draw_team2', 'is_opponent_big6', 'opponent_Liverpool', 'history_team1_win_team2', 'win_before_team2', 'venue_Away', 'draw_before_team1', 'history_team1_lose_team2', 'venue_Home', 'draw_before_team2']
['win_before_team1', 'is_opponent_big6', 'opponent_Liverpool', 'history_team1_win_team2', 'win_before_team2', 'venue_Away', 'draw_before_team1', 'history_team1_lose_team2', 'venue_Home', 'draw_before_team2']
['win_before_team1', 'is_opponent_big6', 'opponent_Liverpool', 'history_team1_win_team2', 'win_before_team2', 'venue_Away', 'draw_befo

In [17]:
def show_list_features_removed(list_features):
    features_not_in_list = [
        feature for feature in X.columns if feature not in list_features
    ]
    
    # In ra các đặc trưng không có trong danh sách
    print("Features bị loại bỏ:", features_not_in_list)

### Thử nghiệm lại với các cột đã chọn


In [18]:
transformed_X = preprocessor.fit_transform(X).toarray()
new_columns = numerical_cols.to_list() + list(
    preprocessor.named_transformers_["cat"].get_feature_names_out(categorical_cols)
)

transformed_X = pd.DataFrame(transformed_X, columns=new_columns)
transformed_X

Unnamed: 0,win_before_team1,win_before_team2,draw_before_team1,draw_before_team2,history_team1_win_team2,history_team1_draw_team2,history_team1_lose_team2,is_opponent_big6,venue_Away,venue_Home,...,opponent_Southampton,opponent_Stoke City,opponent_Sunderland,opponent_Swansea City,opponent_Tottenham Hotspur,opponent_Watford,opponent_West Bromwich Albion,opponent_West Ham United,opponent_Wigan Athletic,opponent_Wolverhampton Wanderers
0,-1.875927,-1.382641,-0.956166,-1.151581,-1.504622,-1.109603,-1.359845,-0.597614,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,-1.875927,2.380832,-0.956166,-1.151581,-1.504622,-1.109603,-1.359845,-0.597614,0.0,1.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,-0.115403,0.499095,-0.956166,-1.151581,-1.504622,-1.109603,-1.359845,-0.597614,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,-0.702244,2.380832,-0.956166,-1.151581,-1.504622,-1.109603,-1.359845,-0.597614,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,-0.115403,0.499095,-0.956166,0.007394,-1.504622,-1.109603,-1.359845,1.673320,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
831,1.645122,-1.382641,-0.956166,-0.224401,0.174841,-0.106457,0.098695,-0.597614,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
832,1.645122,-0.629947,-0.956166,0.702779,0.213898,0.045182,-0.053943,-0.597614,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
833,1.645122,-0.629947,-0.956166,-0.224401,-0.129806,-0.596365,0.785565,1.673320,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
834,1.645122,0.875443,-0.956166,-1.151581,1.854303,-1.109603,-0.995210,-0.597614,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### Tạo ra một hàm để đánh giá accuracy của mô hình trên danh sách các đặc trưng đã chọn

In [19]:
def evaluate_selected_features(model_name, model_list_best_features, evaluate_func):
    max_score = 0
    list_best_features = []
    unique_list = set()
    loop_index = 0

    for index, best_features in enumerate(model_list_best_features):
        cleaned_features_svc = best_features["best_features"]
        score = evaluate_func(cleaned_features_svc)
        if score == max_score:
            temp = tuple(cleaned_features_svc)
            if temp not in unique_list:
                unique_list.add(temp)
                list_best_features.append(cleaned_features_svc)
        elif score > max_score:
            loop_index = index
            max_score = score
            list_best_features = [cleaned_features_svc]
            unique_list.clear()
            unique_list.add(tuple(cleaned_features_svc))

    print(f"Accuracy max: {max_score} at loop: {loop_index}")

    print(f"List best features of: {model_name}")
    print(list_best_features)

    for best_features in list_best_features:
        show_list_features_removed(best_features)

    print("\nRaw features: ")
    print(model_list_best_features[loop_index]["best_features"])

### 1. Random Forest

In [20]:
idx = 0
def evaluate_rf(cleaned_features) -> int:
    global idx
    X_new = transformed_X[cleaned_features]

    logit = LogisticRegression(random_state=seed)
    scores_rf = cross_val_score(
        logit,
        X_new,
        y,
        cv=applied_cv,
        scoring="accuracy",
        error_score="raise",
    )

    print("Loop:", idx)
    idx += 1
    print(scores_rf)
    print("Score mean:", scores_rf.mean())
    print("==========================================================================")
    return scores_rf.mean()

In [21]:
evaluate_selected_features("Random forest", rf_list_best_features, evaluate_rf)

Loop: 0
[0.63095238 0.57142857 0.54761905 0.54761905 0.5952381  0.55952381
 0.55421687 0.55421687 0.53012048 0.56626506 0.5952381  0.58333333
 0.58333333 0.53571429 0.54761905 0.58333333 0.56626506 0.57831325
 0.55421687 0.59036145 0.53571429 0.58333333 0.61904762 0.60714286
 0.57142857 0.5        0.57831325 0.56626506 0.55421687 0.57831325]
Score mean: 0.5689567794989482
Loop: 1
[0.63095238 0.57142857 0.54761905 0.54761905 0.5952381  0.55952381
 0.55421687 0.55421687 0.53012048 0.56626506 0.5952381  0.58333333
 0.58333333 0.53571429 0.54761905 0.58333333 0.56626506 0.57831325
 0.55421687 0.59036145 0.53571429 0.58333333 0.61904762 0.60714286
 0.57142857 0.5        0.57831325 0.56626506 0.55421687 0.57831325]
Score mean: 0.5689567794989482
Loop: 2
[0.63095238 0.57142857 0.54761905 0.54761905 0.5952381  0.55952381
 0.55421687 0.55421687 0.53012048 0.56626506 0.5952381  0.58333333
 0.58333333 0.53571429 0.54761905 0.58333333 0.56626506 0.57831325
 0.55421687 0.59036145 0.53571429 0.58333

### 2. SVC

In [22]:
idx = 0
def evaluate_svc(cleaned_features) -> int:
    global idx
    X_new = transformed_X[cleaned_features]

    logit = LogisticRegression(random_state=seed)
    scores_svc = cross_val_score(
        logit,
        X_new,
        y,
        cv=applied_cv,
        scoring="accuracy",
        error_score="raise",
    )

    print("Loop:", idx)
    idx +=1 
    print(scores_svc)
    print("Score mean:", scores_svc.mean())
    print("==========================================================================")
    return scores_svc.mean()

In [23]:
evaluate_selected_features("SVC", svc_list_best_features, evaluate_svc)

Loop: 0
[0.60714286 0.55952381 0.55952381 0.54761905 0.60714286 0.55952381
 0.55421687 0.57831325 0.53012048 0.56626506 0.5952381  0.55952381
 0.5952381  0.52380952 0.54761905 0.58333333 0.54216867 0.57831325
 0.54216867 0.62650602 0.53571429 0.58333333 0.63095238 0.60714286
 0.57142857 0.5        0.56626506 0.54216867 0.56626506 0.56626506]
Score mean: 0.5677615222795946
Loop: 1
[0.61904762 0.60714286 0.55952381 0.54761905 0.5952381  0.57142857
 0.55421687 0.53012048 0.53012048 0.56626506 0.60714286 0.57142857
 0.5952381  0.54761905 0.54761905 0.57142857 0.57831325 0.59036145
 0.56626506 0.57831325 0.55952381 0.58333333 0.61904762 0.60714286
 0.5952381  0.5        0.57831325 0.56626506 0.55421687 0.60240964]
Score mean: 0.5733314209217825
Loop: 2
[0.60714286 0.55952381 0.55952381 0.54761905 0.60714286 0.55952381
 0.55421687 0.57831325 0.53012048 0.56626506 0.5952381  0.55952381
 0.5952381  0.52380952 0.54761905 0.58333333 0.54216867 0.57831325
 0.54216867 0.62650602 0.53571429 0.58333

### Kết luận
Vậy sau bước lựa chọn feature:
- Random Forest: 
    + win_before_team1,
    + win_before_team2,
    + draw_before_team2,
    + venue_Away,
    + draw_before_team1,
    + history_team1_lose_team2,
    + is_opponent_big6,
    + history_team1_win_team2,
    + history_team1_draw_team2,
    + venue_Home
    
- SVC: danh sách đặc trưng được chọn:
    + win_before_team1
    + win_before_team2
    + draw_before_team2
    + venue_Away
    + venue_Home
    + draw_before_team1
    + history_team1_lose_team2
    + history_team1_win_team2
    + is_opponent_big6

In [26]:
# RF sẽ bỏ opponent
data_rf = matches_df[
    [
        "result",
        "win_before_team1",
        "win_before_team2",
        "draw_before_team1",
        "draw_before_team2",
        "history_team1_lose_team2",
        "history_team1_win_team2",
        "history_team1_draw_team2",
        # "venue_Away",
        # "venue_Home",
        "is_opponent_big6",
    ]
]

data_rf["venue_Home"] = matches_df["venue"].apply(lambda x: 1 if x == "Home" else 0)
data_rf["venue_Away"] = matches_df["venue"].apply(lambda x: 1 if x == "Away" else 0)

data_rf.to_csv("../../train_model/MC/data_rf.csv", index=False)

In [27]:
data_svc = matches_df[
    [
        "result",
        "win_before_team1",
        "win_before_team2",
        "draw_before_team1",
        "draw_before_team2",
        "history_team1_lose_team2",
        "history_team1_win_team2",
        # "venue_Away",
        # "venue_Home",
        "is_opponent_big6",
    ]
]

# Vì model chỉ chọn venue_Home, opponent_Chelsea, opponent_Tottenham Hotspur nên ta chỉ giữ lại các cột này
# thay vì nguyên cột category venue và opponent của data gốc
data_svc["venue_Home"] = matches_df["venue"].apply(lambda x: 1 if x == "Home" else 0)
data_svc["venue_Away"] = matches_df["venue"].apply(lambda x: 1 if x == "Away" else 0)
data_svc.to_csv("../../train_model/MC/data_svc.csv", index=False)