# preprocessing

In [27]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [20]:
'''load data'''
def loaddata(path):
    df= pd.read_excel(path,header=0)
    df= df.rename(columns={'C（wt.%)':'C','T(℃)':'T','σγ(MPa)':'sigma','ΔGγ→α(J)':'dG','Wαβ(μm)':'W'})
    df= df.reset_index(drop= True) # drop index 
    
    return df

'''调整数据'''
def null_filled(df):
    df= df.loc[:,'T':'W']
    df.W= df.W*1000
    df= df.apply(pd.to_numeric,errors='coerce')# pd.to_numeric transform to numeric
    df= df.fillna(value=0)
    return df

'''preprocessing'''
def scale(df):
    array= df.as_matrix() # as_matrix 
    x_train,x_test,y_train,y_test= train_test_split(array[:,0:3],array[:,3],test_size=0.3,random_state=2)
    
    scaler= StandardScaler()
    scaler.fit(x_train)
    x_train= scaler.transform(x_train)
    x_test= scaler.transform(x_test)
    return x_train,x_test,y_train,y_test

df1= loaddata(r'E:\文档\贝氏体板条宽度W公式拟合.xlsx')
df2= null_filled(df1)

data= scale(df2)

In [37]:

'''linear regression'''

linear= LinearRegression()
linear.fit(data[0],data[2])
print(linear.score(data[1],data[3]))
print(linear.coef_,'\n',linear.intercept_)
print(mean_absolute_error(linear.predict(data[1]),data[3]))
print(mean_squared_error(linear.predict(data[1]),data[3]))

0.862258816317
[ 26.62819329 -13.74803428  -3.13304876] 
 73.4864864865
11.8384629499
193.202994124


# NN

In [4]:
import torch
import torch.nn as nn 
import torch.nn.functional as F 
from torch.autograd import Variable

In [45]:

'''data '''

x_train= torch.from_numpy(data[0]).type(torch.FloatTensor)
y_train= torch.unsqueeze(torch.from_numpy(data[2]).type(torch.FloatTensor),dim=1)

x_test= torch.from_numpy(data[1]).type(torch.FloatTensor)
y_test= torch.unsqueeze(torch.from_numpy(data[3]).type(torch.FloatTensor),dim=1)
'''variablize'''
x_train= Variable(x_train)
y_train= Variable(y_train)

'''net'''
class Net(torch.nn.Module):
    def __init__(self,n_h1,n_h2):
        super().__init__()
        self.linear1= nn.Sequential(
                    nn.Linear(3,n_h1),
                    nn.Tanh())
        self.linear2= nn.Sequential(
                    nn.Linear(n_h1,n_h2),
                    nn.Tanh())
        self.predict= nn.Linear(n_h2,1)

    def forward (self,x):
        x= self.linear1(x)
        x= self.linear2(x)
        x= self.predict(x)
        return x

net1= Net(16,64)
# print(net1)

'''optimizer'''
optimizer= torch.optim.Adam(net1.parameters(),lr=0.1)
'''loss funciton'''
loss_func= torch.nn.MSELoss()

'''iteration'''
for k in range(500):
    prediction= net1(x_train)
    loss= loss_func(prediction,y_train)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

#     if k%50 ==0:
#         print(loss.data[0])

'''test'''
y_predict= net1(Variable(x_test))

MAE= mean_absolute_error(y_predict.data.numpy().squeeze(),y_test.numpy().squeeze())
MSE= mean_squared_error(y_predict.data.numpy().squeeze(),y_test.numpy().squeeze())
print(MAE,'\n',MSE)

'''get parameters'''
#     for name,param in net1.named_parameters():
#         if param.requires_grad:
#             print(name,param.data)

#     for param in net1.parameters():
#         print(param)
'''save model'''    
torch.save(net1.state_dict(),r'E:\文档\net1.pkl')




6.71103 
 78.2027


# load model

In [46]:
net2= Net(16,64)
net2.load_state_dict(torch.load(r'E:\文档\net1.pkl'))
y_predict= net2(Variable(x_test))
MAE= mean_absolute_error(y_predict.data.numpy().squeeze(),y_test.numpy().squeeze())
MSE= mean_squared_error(y_predict.data.numpy().squeeze(),y_test.numpy().squeeze())
print(MAE,'\n',MSE)


6.71103 
 78.2027
