In [1]:
import pandas as pd
import numpy as np
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
%matplotlib inline

# 識別子の作成
from sklearn.base import BaseEstimator, TransformerMixin

In [2]:
# 平均を引くClassを作成
class SimpleConverter(BaseEstimator, TransformerMixin):
    def __init__(self):
        self.x_mean = None
    
    def fit(self, X, y=None):
        self.x_mean = X.mean(axis=0)
        self.transformed = pd.DataFrame(np.zeros_like(X))
        return self
    
    def transform(self,X):
        self.transformed.columns = X.columns
        for col in X.columns:
            for row in range(X.shape[0]):
                self.transformed.loc[row, col] = X.loc[row, col]-self.x_mean[col]
        return self.transformed

In [30]:
# california_housingを読み込み
california_housing = fetch_california_housing()
df_X = pd.DataFrame(california_housing.data, columns=california_housing.feature_names)
df_y = pd.Series(california_housing.target)

print(df_X.head())
print(df_y.head())

   MedInc  HouseAge  AveRooms  AveBedrms  Population  AveOccup  Latitude  \
0  8.3252      41.0  6.984127   1.023810       322.0  2.555556     37.88   
1  8.3014      21.0  6.238137   0.971880      2401.0  2.109842     37.86   
2  7.2574      52.0  8.288136   1.073446       496.0  2.802260     37.85   
3  5.6431      52.0  5.817352   1.073059       558.0  2.547945     37.85   
4  3.8462      52.0  6.281853   1.081081       565.0  2.181467     37.85   

   Longitude  
0    -122.23  
1    -122.22  
2    -122.24  
3    -122.25  
4    -122.25  
0    4.526
1    3.585
2    3.521
3    3.413
4    3.422
dtype: float64


In [4]:
df_X.mean(axis=0)

MedInc           3.870671
HouseAge        28.639486
AveRooms         5.429000
AveBedrms        1.096675
Population    1425.476744
AveOccup         3.070655
Latitude        35.631861
Longitude     -119.569704
dtype: float64

In [6]:
# Classをインスタンス化
mean_model = SimpleConverter()

# Classを学習して変換
mean_model.fit(df_X)
X_mean = mean_model.transform(df_X)
print(X_mean)

         MedInc   HouseAge  AveRooms  AveBedrms   Population  AveOccup  \
0      4.454529  12.360514  1.555127  -0.072866 -1103.476744 -0.515100   
1      4.430729  -7.639486  0.809137  -0.124795   975.523256 -0.960813   
2      3.386729  23.360514  2.859136  -0.023229  -929.476744 -0.268395   
3      1.772429  23.360514  0.388352  -0.023616  -867.476744 -0.522710   
4     -0.024471  23.360514  0.852854  -0.015594  -860.476744 -0.889188   
...         ...        ...       ...        ...          ...       ...   
20635 -2.310371  -3.639486 -0.383545   0.036658  -580.476744 -0.510049   
20636 -1.313871 -10.639486  0.685035   0.219114 -1069.476744  0.052152   
20637 -2.170671 -11.639486 -0.223457   0.023417  -418.476744 -0.745020   
20638 -2.003471 -10.639486 -0.099487   0.075245  -684.476744 -0.947446   
20639 -1.482071 -12.639486 -0.174283   0.065589   -38.476744 -0.453674   

       Latitude  Longitude  
0      2.248139  -2.660296  
1      2.228139  -2.650296  
2      2.218139  -2.6702

In [7]:
X_mean = mean_model.fit_transform(df_X)
X_mean

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude
0,4.454529,12.360514,1.555127,-0.072866,-1103.476744,-0.515100,2.248139,-2.660296
1,4.430729,-7.639486,0.809137,-0.124795,975.523256,-0.960813,2.228139,-2.650296
2,3.386729,23.360514,2.859136,-0.023229,-929.476744,-0.268395,2.218139,-2.670296
3,1.772429,23.360514,0.388352,-0.023616,-867.476744,-0.522710,2.218139,-2.680296
4,-0.024471,23.360514,0.852854,-0.015594,-860.476744,-0.889188,2.218139,-2.680296
...,...,...,...,...,...,...,...,...
20635,-2.310371,-3.639486,-0.383545,0.036658,-580.476744,-0.510049,3.848139,-1.520296
20636,-1.313871,-10.639486,0.685035,0.219114,-1069.476744,0.052152,3.858139,-1.640296
20637,-2.170671,-11.639486,-0.223457,0.023417,-418.476744,-0.745020,3.798139,-1.650296
20638,-2.003471,-10.639486,-0.099487,0.075245,-684.476744,-0.947446,3.798139,-1.750296


# Adstockの変換class

## 定式: y_t_ = x_t + alpha * y_t-1

In [None]:
x = pd.DataFrame([1,0,0,0,0])

In [18]:
def adstock(x, alpha=0.5):
    

MedInc           3.870671
HouseAge        28.639486
AveRooms         5.429000
AveBedrms        1.096675
Population    1425.476744
AveOccup         3.070655
Latitude        35.631861
Longitude     -119.569704
dtype: float64