In [47]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.metrics import classification_report, accuracy_score

# import CSv fi
data = pd.read_csv("trainingdata02.csv")

data.head()

Unnamed: 0,battle_id,turn,total_turn,rank,weather,field,condition,p1_side,p1a_form,p1a_hp,...,p2c_status,p2c_tera,p2d_form,p2d_hp,p2d_ability,p2d_item,p2d_move,p2d_status,p2d_tera,win
0,2099996083,0,5,1643.5,,Psychic Terrain:5,,,Smeargle,100,...,,unknown,unknown,100,unknown,unknown,unknown,,unknown,-1
1,2099996083,1,5,1643.5,,Psychic Terrain:4,Trick Room:4,,Smeargle,1,...,,unknown,unknown,100,unknown,unknown,unknown,,unknown,-1
2,2099996083,2,5,1643.5,SunnyDay:5,Psychic Terrain:3,Trick Room:3,,Torkoal,100,...,,unknown,unknown,100,unknown,unknown,unknown,,unknown,-1
3,2099996083,3,5,1643.5,SunnyDay:4,Psychic Terrain:2,Trick Room:2,,Torkoal,100,...,fnt,unknown,unknown,100,unknown,unknown,unknown,,unknown,-1
4,2099996083,4,5,1643.5,SunnyDay:3,Psychic Terrain:1,Trick Room:1,,Torkoal,55,...,fnt,unknown,Urshifu,0,unknown,unknown,"Wicked Blow:3,Detect:4",fnt,unknown,-1


In [48]:
# To filter specific columns.
columns_to_keep = [
    'battle_id', 'p1a_form', 'p1b_form', 'p1c_form', 'p1d_form',
    'p2a_form', 'p2b_form', 'p2c_form', 'p2d_form', 'win'
]
filtered_data = data[columns_to_keep]

# To delete rows where the 'win' column is 0.
#filtered_data = filtered_data[filtered_data['win'] != 0]

# 提取唯一的名字
columns_to_encode = [
    'p1a_form', 'p1b_form', 'p1c_form', 'p1d_form',
    'p2a_form', 'p2b_form', 'p2c_form', 'p2d_form'
]
unique_names = pd.unique(filtered_data[columns_to_encode].values.ravel('K'))

# 为每个名字分配一个唯一的数字
name_to_number = {name: idx for idx, name in enumerate(unique_names, start=1)}

# 使用字典对列进行编码
for col in columns_to_encode:
    filtered_data[col] = filtered_data[col].map(name_to_number)

# 手动执行聚合
# 初始化存储结果的列表
battle_ids = []
p1_forms = []
p2_forms = []
wins = []

# 遍历分组数据并聚合
for battle_id, group in filtered_data.groupby('battle_id'):
    battle_ids.append(battle_id)
    p1_forms.append(group[['p1a_form', 'p1b_form', 'p1c_form', 'p1d_form']].values.flatten().tolist())
    p2_forms.append(group[['p2a_form', 'p2b_form', 'p2c_form', 'p2d_form']].values.flatten().tolist())
    wins.append(group['win'].iloc[0])

# 创建一个包含聚合数据的新DataFrame
aggregated_data = pd.DataFrame({
    'battle_id': battle_ids,
    'p1_forms': p1_forms,
    'p2_forms': p2_forms,
    'win': wins
})

# 定义去重函数
def remove_duplicates(forms_list):
    return list(dict.fromkeys(forms_list))

# 去除p1_forms和p2_forms中的重复项
aggregated_data['p1_forms'] = aggregated_data['p1_forms'].apply(remove_duplicates)
aggregated_data['p2_forms'] = aggregated_data['p2_forms'].apply(remove_duplicates)

# 转换列表列为固定长度的向量，长度不足的用0填充
max_length = max(aggregated_data['p1_forms'].apply(len).max(), aggregated_data['p2_forms'].apply(len).max())

def pad_list(forms_list, length):
    return forms_list + [0] * (length - len(forms_list))

aggregated_data['p1_forms'] = aggregated_data['p1_forms'].apply(lambda x: pad_list(x, max_length))
aggregated_data['p2_forms'] = aggregated_data['p2_forms'].apply(lambda x: pad_list(x, max_length))

# 将特征列转换为向量格式
X = pd.concat([pd.DataFrame(aggregated_data['p1_forms'].tolist()), pd.DataFrame(aggregated_data['p2_forms'].tolist())], axis=1)
y = aggregated_data['win']

# 使用分层抽样拆分数据为训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

X


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_data[col] = filtered_data[col].map(name_to_number)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_data[col] = filtered_data[col].map(name_to_number)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_data[col] = filtered_data[col].map(name_to_number)
A value is trying to be s

ValueError: The least populated class in y has only 1 member, which is too few. The minimum number of groups for any class cannot be less than 2.

In [None]:
aggregated_data

In [None]:
X

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [None]:
X_train

In [None]:
# Initialize the Random Forest classifier
rf_classifier = RandomForestClassifier(n_estimators=500, max_depth=100, random_state=100)

# Train the classifier
rf_classifier.fit(X_train, y_train)

# Make predictions
y_pred = rf_classifier.predict(X_test)

# Evaluate the classifier
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy}")
print("Classification Report for RandomForestClassifier:")
print(report)

In [None]:
et = ExtraTreesClassifier(n_estimators=500, max_depth=100)

et.fit(X_train, y_train)
y_pred = et.predict(X_test)

# Evaluate the classifier
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)
print(f'Accuracy: {accuracy}')
print(f'Classification Report for ExtraTreesClassifier:\n{report}')

In [None]:
# Extract unique names
columns_to_keep = [
    'p1a_form', 'p1b_form', 'p1c_form', 'p1d_form',
    'p2a_form', 'p2b_form', 'p2c_form', 'p2d_form'
]
unique_names = pd.unique(filtered_data[columns_to_encode].values.ravel('K'))

# Assign a unique number to each name
name_to_number = {name: idx for idx, name in enumerate(unique_names, start=1)}

# Encode the columns using the dictionary
for col in columns_to_encode:
    filtered_data[col] = filtered_data[col].map(name_to_number)

# Manually perform aggregation
# Initialize lists to store the results
battle_ids = []
p1_forms = []
p2_forms = []
wins = []

# Iterate over grouped data and aggregate
for battle_id, group in filtered_data.groupby('battle_id'):
    battle_ids.append(battle_id)
    p1_forms.append(group[['p1a_form', 'p1b_form', 'p1c_form', 'p1d_form']].values.flatten().tolist())
    p2_forms.append(group[['p2a_form', 'p2b_form', 'p2c_form', 'p2d_form']].values.flatten().tolist())
    wins.append(group['win'].iloc[0])

# Create a new DataFrame with the aggregated data
aggregated_data = pd.DataFrame({
    'battle_id': battle_ids,
    'p1_forms': p1_forms,
    'p2_forms': p2_forms,
    'win': wins
})

# Define a function to remove duplicates
def remove_duplicates(forms_list):
    return list(dict.fromkeys(forms_list))

# Remove duplicates from p1_forms and p2_forms
aggregated_data['p1_forms'] = aggregated_data['p1_forms'].apply(remove_duplicates)
aggregated_data['p2_forms'] = aggregated_data['p2_forms'].apply(remove_duplicates)

# Convert list columns to fixed-length vectors, padding with 0 if necessary
max_length = max(aggregated_data['p1_forms'].apply(len).max(), aggregated_data['p2_forms'].apply(len).max())

def pad_list(forms_list, length):
    return forms_list + [0] * (length - len(forms_list))

aggregated_data['p1_forms'] = aggregated_data['p1_forms'].apply(lambda x: pad_list(x, max_length))
aggregated_data['p2_forms'] = aggregated_data['p2_forms'].apply(lambda x: pad_list(x, max_length))

# Convert feature columns to vector format
X = pd.concat([pd.DataFrame(aggregated_data['p1_forms'].tolist()), pd.DataFrame(aggregated_data['p2_forms'].tolist())], axis=1)
y = aggregated_data['win']

# Split the data into training and testing sets using stratified sampling
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)


#total turn effact

In [None]:
# Filter specific columns
columns_to_keep = [
    'battle_id', 'total_turn', 'p1a_form', 'p1b_form', 'p1c_form', 'p1d_form',
    'p2a_form', 'p2b_form', 'p2c_form', 'p2d_form', 'win'
]
filtered_data = data[columns_to_keep]

# Delete rows where the 'win' column is 0
filtered_data = filtered_data[filtered_data['win'] != 0]

# Extract unique names
columns_to_encode = [
    'p1a_form', 'p1b_form', 'p1c_form', 'p1d_form',
    'p2a_form', 'p2b_form', 'p2c_form', 'p2d_form'
]
unique_names = pd.unique(filtered_data[columns_to_encode].values.ravel('K'))

# Assign a unique number to each name
name_to_number = {name: idx for idx, name in enumerate(unique_names, start=1)}

# Encode the columns using the dictionary
for col in columns_to_encode:
    filtered_data[col] = filtered_data[col].map(name_to_number)
    
# Manually perform aggregation
# Initialize lists to store the results
battle_ids = []
p1_forms = []
p2_forms = []
wins = []
total_turns = []

# Iterate over grouped data and aggregate
for battle_id, group in filtered_data.groupby('battle_id'):
    battle_ids.append(battle_id)
    total_turns.append(group[['total_turn']].values.flatten().tolist())
    p1_forms.append(group[['p1a_form', 'p1b_form', 'p1c_form', 'p1d_form']].values.flatten().tolist())
    p2_forms.append(group[['p2a_form', 'p2b_form', 'p2c_form', 'p2d_form']].values.flatten().tolist())
    wins.append(group['win'].iloc[0])

# Create a new DataFrame with the aggregated data
aggregated_data = pd.DataFrame({
    'battle_id': battle_ids,
    'p1_forms': p1_forms,
    'p2_forms': p2_forms,
    'win': wins,
    'total_turn': total_turns
})

# Define a function to remove duplicates
def remove_duplicates(forms_list):
    return list(dict.fromkeys(forms_list))

# Remove duplicates from p1_forms and p2_forms
aggregated_data['p1_forms'] = aggregated_data['p1_forms'].apply(remove_duplicates)
aggregated_data['p2_forms'] = aggregated_data['p2_forms'].apply(remove_duplicates)

# Convert list columns to fixed-length vectors, padding with 0 if necessary
max_length = max(aggregated_data['p1_forms'].apply(len).max(), aggregated_data['p2_forms'].apply(len).max())

def pad_list(forms_list, length):
    return forms_list + [0] * (length - len(forms_list))

aggregated_data['p1_forms'] = aggregated_data['p1_forms'].apply(lambda x: pad_list(x, max_length))
aggregated_data['p2_forms'] = aggregated_data['p2_forms'].apply(lambda x: pad_list(x, max_length))

# Convert feature columns to vector format
X = pd.concat([pd.DataFrame(aggregated_data['p1_forms'].tolist()), pd.DataFrame(aggregated_data['p2_forms'].tolist())], axis=1)
y = aggregated_data['win']

In [None]:
aggregated_data['total_turn']

In [None]:
import numpy as np

In [None]:
# define a function to get unique values
def get_unique(arr):
    return np.unique(arr)

# apply to every row of the dataframe
aggregated_data['total_turn'] = aggregated_data['total_turn'].apply(get_unique)

In [None]:
aggregated_data

In [None]:
X = pd.concat([pd.DataFrame(aggregated_data['p1_forms'].tolist()), pd.DataFrame(aggregated_data['p2_forms'].tolist()),pd.DataFrame(aggregated_data['total_turn'].tolist())+len(unique_names)+1], axis=1)
Z= pd.concat([pd.DataFrame(aggregated_data['p1_forms'].tolist()), pd.DataFrame(aggregated_data['p2_forms'].tolist()),pd.DataFrame(aggregated_data['total_turn'].tolist())], axis=1)
X

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [None]:
# Initialize the Random Forest classifier
rf_classifier = RandomForestClassifier(n_estimators=500, max_depth=200, random_state=100)

# Train the classifier
rf_classifier.fit(X_train, y_train)

# Make predictions
y_pred = rf_classifier.predict(X_test)

# Evaluate the classifier
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy}")
print("Classification Report:")
print(report)


In [None]:
et = ExtraTreesClassifier(n_estimators=500, max_depth=200)

et.fit(X_train, y_train)
y_pred = et.predict(X_test)

# Evaluate the classifier
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)
print(f'Accuracy: {accuracy}')
print(f'Classification Report:\n{report}')