In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression

train = pd.DataFrame({
    'Level': [10, 25, 40, 15, 50, 30],
    'Class': ['Warrior', 'Mage', 'Necromancer', 'Warrior', 'Mage', 'Necromancer'],
    'Weapon': ['Common', 'Rare', 'Legendary', 'Common', 'Rare', 'Legendary'],
    'ElementCode': [1, 2, 3, 1, 2, 3], # 1:불, 2:얼음, 3:독 (숫자지만 범주형!)
    'CombatPower': [1500, 3200, 8500, 1800, 6000, 7200]
})

test = pd.DataFrame({
    'Level': [12, 28, 18],
    'Class': ['Warrior', 'Mage', 'Warrior'],
    'Weapon': ['Common', 'Rare', 'Common'],
    'ElementCode': [1, 2, 1] # 3번 속성(독) 유저가 없음
})

print("=== 1. 원본 데이터 확인 ===")
print("[Train]\n", train[['Class', 'Weapon', 'ElementCode']])
print("[Test]\n", test[['Class', 'Weapon', 'ElementCode']])


=== 1. 원본 데이터 확인 ===
[Train]
          Class     Weapon  ElementCode
0      Warrior     Common            1
1         Mage       Rare            2
2  Necromancer  Legendary            3
3      Warrior     Common            1
4         Mage       Rare            2
5  Necromancer  Legendary            3
[Test]
      Class  Weapon  ElementCode
0  Warrior  Common            1
1     Mage    Rare            2
2  Warrior  Common            1


In [2]:

combined_df = pd.concat([train.assign(source='train'), test.assign(source='test')], ignore_index=True)

print("=== Combined Data Before One-Hot Encoding ===")
display(combined_df)

=== Combined Data Before One-Hot Encoding ===


Unnamed: 0,Level,Class,Weapon,ElementCode,CombatPower,source
0,10,Warrior,Common,1,1500.0,train
1,25,Mage,Rare,2,3200.0,train
2,40,Necromancer,Legendary,3,8500.0,train
3,15,Warrior,Common,1,1800.0,train
4,50,Mage,Rare,2,6000.0,train
5,30,Necromancer,Legendary,3,7200.0,train
6,12,Warrior,Common,1,,test
7,28,Mage,Rare,2,,test
8,18,Warrior,Common,1,,test


In [3]:
dummy_df = pd.get_dummies(combined_df, columns=['Class', 'Weapon', 'ElementCode'], drop_first=True)

print("=== Data After One-Hot Encoding ===")
display(dummy_df)

# 원-핫 인코딩된 데이터를 다시 train과 test로 분리 (선택 사항)
train_encoded = dummy_df[dummy_df['source'] == 'train'].drop('source', axis=1)
test_encoded = dummy_df[dummy_df['source'] == 'test'].drop('source', axis=1)

print("\n=== Encoded Train Data ===")
display(train_encoded)
print("\n=== Encoded Test Data ===")
display(test_encoded)

=== Data After One-Hot Encoding ===


Unnamed: 0,Level,CombatPower,source,Class_Necromancer,Class_Warrior,Weapon_Legendary,Weapon_Rare,ElementCode_2,ElementCode_3
0,10,1500.0,train,False,True,False,False,False,False
1,25,3200.0,train,False,False,False,True,True,False
2,40,8500.0,train,True,False,True,False,False,True
3,15,1800.0,train,False,True,False,False,False,False
4,50,6000.0,train,False,False,False,True,True,False
5,30,7200.0,train,True,False,True,False,False,True
6,12,,test,False,True,False,False,False,False
7,28,,test,False,False,False,True,True,False
8,18,,test,False,True,False,False,False,False



=== Encoded Train Data ===


Unnamed: 0,Level,CombatPower,Class_Necromancer,Class_Warrior,Weapon_Legendary,Weapon_Rare,ElementCode_2,ElementCode_3
0,10,1500.0,False,True,False,False,False,False
1,25,3200.0,False,False,False,True,True,False
2,40,8500.0,True,False,True,False,False,True
3,15,1800.0,False,True,False,False,False,False
4,50,6000.0,False,False,False,True,True,False
5,30,7200.0,True,False,True,False,False,True



=== Encoded Test Data ===


Unnamed: 0,Level,CombatPower,Class_Necromancer,Class_Warrior,Weapon_Legendary,Weapon_Rare,ElementCode_2,ElementCode_3
6,12,,False,True,False,False,False,False
7,28,,False,False,False,True,True,False
8,18,,False,True,False,False,False,False


In [7]:
X_train = train_encoded.drop(columns=['CombatPower'])
y_train = train_encoded[['CombatPower']]
X_test = test_encoded.drop(columns=['CombatPower'])

lireg = LinearRegression()
lireg.fit(X_train, y_train)
y_pred = lireg.predict(X_test)

print('y_pred 출력결과')
print('-----------------')
print(y_pred)

y_pred 출력결과
-----------------
[[1593.66666667]
 [3529.66666667]
 [2269.66666667]]
