In [1]:
import pandas as pd
import numpy as np
import lightgbm as lgb
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import KFold
from sklearn.metrics import r2_score
sns.set(context="notebook", style="darkgrid")
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')

In [2]:
train=pd.read_csv("G:/compete/ai/city_rent_precent/data_set/train_data.csv")
test=pd.read_csv("G:/compete/ai/city_rent_precent/data_set/test_a.csv")

In [3]:
train = train.drop(["ID","city"],axis = 1)
test= test.drop(["ID","city"],axis = 1)


In [4]:
for df in [train,test]:
    df['rentType'][df['rentType']=='--']='未知方式'
    #转换object数据类型
    columns=['rentType', 'houseFloor', 'houseToward', 'houseDecoration', 'communityName', 'region', 'plate']
    for col in columns:
        df[col]=df[col].astype('category')
        

In [5]:
for df in [train,test]:
    #将bulidyear列转换为整型数据
    tmp = df['buildYear'].copy()
    tmp2 = tmp[tmp!='暂无信息'].astype('int')
    tmp[tmp=='暂无信息'] = tmp2.mode().iloc[0]
    df['buildYear'] = tmp
    df['buildYear'] = df['buildYear'].astype('int')
    #处理pv/uv的空值
    df['pv'].fillna(df['pv'].mean(),inplace=True)
    df['uv'].fillna(df['uv'].mean(),inplace=True)
    df['pv'] = df['pv'].astype('int')
    df['uv'] = df['uv'].astype('int')
    

In [6]:
for df in [train,test]:
    # 将houseType转化为‘房间数’，‘厅数’，‘卫生间数’
    def parseRoom(info, index):
        res = int(info[index*2])
        return res
    df.insert(3,'室',None)
    df.insert(4, '厅', None)
    df.insert(5, '卫', None)
    df['室'] = df['houseType'].apply(parseRoom, index=0)
    df['厅'] = df['houseType'].apply(parseRoom, index=1)
    df['卫'] = df['houseType'].apply(parseRoom, index=2)
    
    df['交易月份'] = df['tradeTime'].apply(lambda x: int(x.split('/')[1]))
#     df['pv/uv'] = df['pv'] / df['uv']
#     df['房间总数'] = df['室'] + df['厅'] + df['卫']
    
    df.drop('houseType', axis=1, inplace=True)
    df.drop('tradeTime', axis=1, inplace=True)
    
    

# 简单数据清洗

In [7]:
train = train[train['area']<=700]
train = train[train['tradeMoney']<=100000]

# new1 删除离群点

In [8]:
train = train[train["area"]<400]
train = train[(train["area"]<400)&(train["tradeMoney"]<60000)]
train = train.drop(train[(train['area']<130)&(train["tradeMoney"]>40000)].index)
train = train.drop(train[(train['area']>350)&(train["tradeMoney"]<5000)].index)
train = train.drop(train[train["tradeMoney"]<1].index)
train= train.drop(train[train["area"]<10].index)

# new2 删除test中没有的值

In [9]:
train = train.drop(train[train['室']==0].index)
train = train.drop(train[train['室']==7].index)
train = train.drop(train[train['室']==8].index)
train = train.drop(train[train['室']==9].index)

train = train.drop(train[train['厅']==4].index)
train = train.drop(train[train['厅']==5].index)

train = train.drop(train[train['卫']==8].index)
train = train.drop(train[train['卫']==7].index)
train = train.drop(train[train['卫']==5].index)

# new3 删除test中没有的值

In [10]:
train = train.drop(train[train['region']=="RG00015"].index)

# new4 删除test中没有的值

In [11]:
trian = train.drop(train[(train["plate"] == "BK00058")|(train["plate"] == "BK00032")].index)

# 特征工程


In [12]:
trian.head()

Unnamed: 0,area,rentType,室,厅,卫,houseFloor,totalFloor,houseToward,houseDecoration,communityName,...,landTotalPrice,landMeanPrice,totalWorkers,newWorkers,residentPopulation,pv,uv,lookNum,tradeMoney,交易月份
0,68.06,未知方式,2,1,1,低,16,暂无数据,其他,XQ00051,...,0,0.0,28248,614,111546,1124,284,0,2000.0,11
1,125.55,未知方式,3,2,2,中,14,暂无数据,简装,XQ00130,...,0,0.0,14823,148,157552,701,22,1,2000.0,12
2,132.0,未知方式,3,2,2,低,32,暂无数据,其他,XQ00179,...,0,0.0,77645,520,131744,57,20,1,16000.0,12
3,57.0,未知方式,1,1,1,中,17,暂无数据,精装,XQ00313,...,332760000,3080.0331,8750,1665,253337,888,279,9,1600.0,12
4,129.0,未知方式,3,2,3,低,2,暂无数据,毛坯,XQ01257,...,0,0.0,800,117,125309,2038,480,0,2900.0,11


In [13]:
train["HomeNum"] = train["室"] + train["卫"] + train["厅"]

In [14]:
train['HomeNum'].head()

0    4
1    7
2    7
3    3
4    8
Name: HomeNum, dtype: int64

In [15]:

train["室"].isnull().sum()

0

## 成交均价

In [16]:
trian["tradeMean"] = train["totalTradeMoney"]/train["totalTradeArea"]
trian["tradeNewMean"] = train["totalNewTradeMoney"]/train["totalNewTradeArea"]
trian["landMean"] = train["landTotalPrice"]/train["tradeLandArea"]

## 总数量

In [17]:
trian["bus"] = train["subwayStationNum"] + train["busStationNum"]
train["school"]  = train["interSchoolNum"]+train["schoolNum"]+train["privateSchoolNum"]
trian["docter"] = train["hospitalNum"] + train["drugStoreNum"]
trian["shop"] = train["shopNum"] + train["superMarketNum"]+ train["mallNum"]
trian["sport"] = train["gymNum"] + train["parkNum"]

In [18]:
test["HomeNum"] = test["室"] + test["卫"] + test["厅"]
test["tradeMean"] = test["totalTradeMoney"]/test["totalTradeArea"]
test["tradeNewMean"] = test["totalNewTradeMoney"]/test["totalNewTradeArea"]
test["landMean"] = test["landTotalPrice"]/test["tradeLandArea"]
test["bus"] = test["subwayStationNum"] + test["busStationNum"]
test["school"]  = test["interSchoolNum"]+test["schoolNum"]+test["privateSchoolNum"]
test["docter"] = test["hospitalNum"] + test["drugStoreNum"]
test["shop"] = test["shopNum"] + test["superMarketNum"]+ test["mallNum"]
test["sport"] = test["gymNum"] + test["parkNum"]

In [19]:
train.shape

(40142, 53)

In [20]:
columns = ['rentType', 'houseFloor', 'houseToward', 'houseDecoration', 'region', 'plate']
for col in columns:
    train[col] = train[col].astype('category')
    test[col] = test[col].astype('category')

In [21]:
for col in train.columns:
    if train[col].dtype.name == "category":
        print(col)

rentType
houseFloor
houseToward
houseDecoration
communityName
region
plate


# 特征选择

## Filter（过滤法）

### 这类方法先对数据机进行特征选择，然后再训练学习器，特征选择的过程与后续学习器无关。

- Chi-squared test（卡方检验）
- 值越大，越相关，设置阈值保留相关特征
- 只适用于分类问题中离散型特征筛选，不能用于分类问题中连续型特征的筛选，也不能用于回归问题的特征筛选
- 特征不能是字符型
- 特征值不能太大
- 索引找到列名

In [22]:

from sklearn.feature_selection import SelectKBest,SelectPercentile
from sklearn.feature_selection import chi2

In [23]:
X = train.drop(["tradeMoney"],axis=1)
y = train["tradeMoney"]

In [24]:
# 去掉字符型特征
for col in X.columns:
    if X[col].dtype.name == "category":
        X = X.drop([col],axis=1)

In [25]:
X_new = SelectKBest(chi2, k=43).fit(X, y).get_support(indices = True)

# Wrapper（封装法）

## 直接把最后要使用的分类器作为特征选择的评价函数，对于特定的分类器选择最优的特征子集。
- 递归特征消除（Recursive feature elimination，RFE）
- 不能有字符串类型

In [26]:
from sklearn.feature_selection import RFE
from sklearn.linear_model import LinearRegression
lr = LinearRegression()

In [27]:
rfe = RFE(lr, n_features_to_select=40)

In [28]:

X = train.drop(["tradeMoney"],axis=1)
y = train["tradeMoney"]

In [29]:
# 去掉字符型特征
for col in X.columns:
    if X[col].dtype.name == "category":
        X = X.drop([col],axis=1)
X_columns = X.columns

In [30]:
rfe.fit(X,y)

RFE(estimator=LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,
         normalize=False),
  n_features_to_select=40, step=1, verbose=0)

In [31]:
print("Features sorted by their rank:")
print(sorted(zip(map(lambda x: round(x, 4), rfe.ranking_), X.columns)))

Features sorted by their rank:
[(1, 'HomeNum'), (1, 'area'), (1, 'bankNum'), (1, 'buildYear'), (1, 'busStationNum'), (1, 'drugStoreNum'), (1, 'gymNum'), (1, 'hospitalNum'), (1, 'interSchoolNum'), (1, 'landMeanPrice'), (1, 'lookNum'), (1, 'mallNum'), (1, 'newWorkers'), (1, 'parkNum'), (1, 'privateSchoolNum'), (1, 'remainNewNum'), (1, 'residentPopulation'), (1, 'saleSecHouseNum'), (1, 'school'), (1, 'schoolNum'), (1, 'shopNum'), (1, 'subwayStationNum'), (1, 'superMarketNum'), (1, 'supplyLandArea'), (1, 'supplyLandNum'), (1, 'supplyNewNum'), (1, 'totalFloor'), (1, 'totalNewTradeArea'), (1, 'totalTradeArea'), (1, 'totalWorkers'), (1, 'tradeLandNum'), (1, 'tradeMeanPrice'), (1, 'tradeNewMeanPrice'), (1, 'tradeNewNum'), (1, 'tradeSecNum'), (1, 'uv'), (1, '交易月份'), (1, '卫'), (1, '厅'), (1, '室'), (2, 'tradeLandArea'), (3, 'pv'), (4, 'totalTradeMoney'), (5, 'totalNewTradeMoney'), (6, 'landTotalPrice')]


In [32]:
rfe.ranking_,rfe.n_features_

(array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 1,
        1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 1, 2, 6, 1, 1, 1, 1, 3, 1, 1, 1, 1,
        1]), 40)

In [33]:
print(rfe.support_)

[ True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True False  True  True  True
 False  True  True  True  True  True  True  True  True False False  True
  True  True  True False  True  True  True  True  True]


In [34]:
sel_features = [f for f, s in zip(X_columns, rfe.support_) if s]

In [35]:
train1 = train[sel_features]
test1 = test[sel_features]

In [36]:
categorical_feats = ['rentType', 'houseFloor', 'houseToward', 'houseDecoration', 'region', 'plate']

In [37]:
for col in categorical_feats:
    train2 = pd.concat([train[col],train1],axis = 1)
    test2 = pd.concat([test[col],test1],axis = 1)

# Embedded(嵌入法）


## 固定模型，挑选出对模型训练有重要意义的特征
- 基于惩罚项的特征选择法
- Lasso(l1)和Ridge(l2)

In [38]:
from sklearn.linear_model import Ridge
X = train.drop(["tradeMoney"],axis=1)
y = train["tradeMoney"]
for col in X.columns:
    if X[col].dtype.name == "category":
        X = X.drop([col],axis=1)
X_columns = X.columns
ridge = Ridge(alpha=5)
ridge.fit(X,y)

Ridge(alpha=5, copy_X=True, fit_intercept=True, max_iter=None,
   normalize=False, random_state=None, solver='auto', tol=0.001)

In [39]:
coefSort = ridge.coef_.argsort()

In [40]:
coefSort

array([ 2,  1, 32, 11, 17, 13, 10,  5, 16,  6, 41,  8, 23, 28, 40, 25, 21,
       37, 36, 31, 34, 24, 20, 38, 35, 39, 33, 26, 22, 29, 27, 15, 19, 42,
       44, 14,  0,  7,  4,  9, 30, 12, 18, 43,  3], dtype=int64)

In [41]:
featureCoefSore=ridge.coef_[coefSort]

featureCoefSore

array([-4.38301292e+02, -1.69254244e+02, -1.63740141e+02, -5.87931402e+01,
       -3.71155602e+01, -2.62890837e+01, -1.69722257e+01, -8.02956563e+00,
       -5.94038370e+00, -5.93989474e+00, -4.79050535e+00, -4.33285297e+00,
       -1.36989383e+00, -1.62312942e-01, -6.76635064e-02, -4.74675081e-02,
       -1.37987101e-02, -4.45986872e-03, -2.12023321e-03, -1.22916146e-03,
       -1.15667229e-07,  5.86155038e-07,  9.75775044e-07,  3.93635622e-04,
        1.49620901e-03,  2.38266754e-03,  2.67458704e-03,  3.11321364e-03,
        2.93132690e-02,  4.76457160e-02,  1.08160245e+00,  2.79345587e+00,
        9.84138723e+00,  1.20649950e+01,  2.03858963e+01,  4.27162016e+01,
        5.28124809e+01,  5.53884022e+01,  8.45240152e+01,  9.61512619e+01,
        1.10290792e+02,  1.49584651e+02,  1.92802415e+02,  2.54093725e+02,
        8.61649262e+02])

In [42]:

X_columns[coefSort]   # 从小到达排序的特征

Index(['厅', '室', 'tradeLandNum', 'privateSchoolNum', 'parkNum', 'drugStoreNum',
       'schoolNum', 'buildYear', 'shopNum', 'saleSecHouseNum', 'lookNum',
       'busStationNum', 'tradeSecNum', 'remainNewNum', 'uv',
       'totalNewTradeArea', 'totalTradeArea', 'newWorkers', 'totalWorkers',
       'supplyLandArea', 'landTotalPrice', 'totalNewTradeMoney',
       'totalTradeMoney', 'residentPopulation', 'landMeanPrice', 'pv',
       'tradeLandArea', 'tradeNewMeanPrice', 'tradeMeanPrice', 'supplyNewNum',
       'tradeNewNum', 'bankNum', 'superMarketNum', '交易月份', 'school', 'gymNum',
       'area', 'subwayStationNum', 'totalFloor', 'interSchoolNum',
       'supplyLandNum', 'hospitalNum', 'mallNum', 'HomeNum', '卫'],
      dtype='object')

In [43]:
sel_features = [f for f, s in zip(X_columns, featureCoefSore) if abs(s)> 2 ] # 选择绝对值大于二的特征
train3 = train[sel_features]
test3 = test[sel_features]

In [44]:

for col in categorical_feats:
    train3 = pd.concat([train[col],train3],axis = 1)
    test3 = pd.concat([test[col],test3],axis = 1)

 - 基于树模型的特征选择法
- 随机森林 平均不纯度减少（mean decrease impurity

In [45]:
X = train.drop(["tradeMoney"],axis=1)
y = train["tradeMoney"]
for col in X.columns:
    if X[col].dtype.name == "category":
        X = X.drop([col],axis=1)
X_columns = X.columns

In [46]:
from sklearn.ensemble import RandomForestRegressor
rf = RandomForestRegressor()
# 训练随机森林模型，并通过feature_importances_属性获取每个特征的重要性分数。rf = RandomForestRegressor()
rf.fit(X, y)
print("Features sorted by their score:")
print(sorted(zip(map(lambda x: round(x, 4), rf.feature_importances_), X_columns),
             reverse=True))

Features sorted by their score:
[(0.5342, 'area'), (0.1898, 'tradeMeanPrice'), (0.0356, 'totalFloor'), (0.0311, 'busStationNum'), (0.0304, 'tradeNewMeanPrice'), (0.0303, 'buildYear'), (0.0122, 'HomeNum'), (0.011, 'gymNum'), (0.0091, 'totalWorkers'), (0.0087, '厅'), (0.0087, 'bankNum'), (0.0072, 'remainNewNum'), (0.007, 'interSchoolNum'), (0.0058, 'privateSchoolNum'), (0.0054, 'totalTradeMoney'), (0.0051, 'totalTradeArea'), (0.0048, '室'), (0.0048, 'tradeSecNum'), (0.0047, '交易月份'), (0.0045, '卫'), (0.0043, 'pv'), (0.0039, 'uv'), (0.0039, 'residentPopulation'), (0.0036, 'parkNum'), (0.0035, 'saleSecHouseNum'), (0.0034, 'totalNewTradeMoney'), (0.0034, 'totalNewTradeArea'), (0.0032, 'mallNum'), (0.0031, 'superMarketNum'), (0.0024, 'tradeNewNum'), (0.0024, 'shopNum'), (0.0021, 'newWorkers'), (0.0019, 'school'), (0.0017, 'subwayStationNum'), (0.0014, 'hospitalNum'), (0.0013, 'schoolNum'), (0.001, 'lookNum'), (0.001, 'drugStoreNum'), (0.0009, 'supplyNewNum'), (0.0003, 'supplyLandArea'), (0.0003,

In [47]:
sel_features = [f for f, s in zip(X_columns, rf.feature_importances_) if abs(s)> 0.001 ] # 选择绝对值大于二的特征

train4 = train[sel_features]
test4 = test[sel_features]
for col in categorical_feats:
    train4 = pd.concat([train[col],train4],axis = 1)
    test4 = pd.concat([test[col],test4],axis = 1)

- 平均精确度减少（mean decrease accuracy）????
- 打乱每个特征的特征值顺序,重要特征影响大

In [48]:
X = train.drop(["tradeMoney"],axis=1)
Y = train["tradeMoney"]
for col in X.columns:
    if X[col].dtype.name == "category":
        X = X.drop([col],axis=1)
X_columns = X.columns

In [49]:
from sklearn.model_selection import StratifiedShuffleSplit

from sklearn.metrics import r2_score
from collections import defaultdict

rf = RandomForestRegressor()
scores = defaultdict(list)


ss=StratifiedShuffleSplit(n_splits=5,test_size=0.25,train_size=0.75,random_state=0)

#crossvalidate the scores on a number of different random splits of the data
for train_idx, test_idx in ss:
    X_train, X_test = X[train_idx], X[test_idx]
    Y_train, Y_test = Y[train_idx], Y[test_idx]
    # 使用修改前的原始特征训练模型，其acc作为后续混洗特征值后的对比标准。r = rf.fit(X_train, Y_train)
    acc = r2_score(Y_test, rf.predict(X_test))
    # 遍历每一列特征
    for i in range(X.shape[1]):
        X_t = X_test.copy()
        # 对这一列特征进行混洗，交互了一列特征内部的值的顺序
        np.random.shuffle(X_t[:, i])
        shuff_acc = r2_score(Y_test, rf.predict(X_t))
        # 混洗某个特征值后，计算平均精确度减少程度。scores[names[i]].append((acc-shuff_acc)/acc)
print("Features sorted by their score:")
print(sorted([(round(np.mean(score), 4), feat) for feat, score in scores.items()], reverse=True))

TypeError: 'StratifiedShuffleSplit' object is not iterable

In [50]:
X = train.drop(["tradeMoney"],axis=1)
Y = train["tradeMoney"]
for col in X.columns:
    if X[col].dtype.name == "category":
        X = X.drop([col],axis=1)
X_columns = X.columns

In [51]:
from sklearn import metrics
from sklearn.ensemble import ExtraTreesClassifier
model = ExtraTreesClassifier() # build extra tree model
model.fit(X,Y)
print(model.feature_importances_) #display importance of each variables

[0.35074663 0.02863654 0.01546764 0.01136247 0.13102032 0.17109213
 0.04594781 0.00188385 0.00301867 0.00169321 0.00218434 0.00206114
 0.001946   0.00225897 0.00238007 0.0018788  0.00218806 0.00214295
 0.00230527 0.00226835 0.01223752 0.01197903 0.0127518  0.01211763
 0.00905312 0.00896493 0.00951614 0.00874973 0.01005827 0.00331097
 0.00138822 0.00177159 0.00102445 0.0011214  0.00114721 0.00139317
 0.00193534 0.00487644 0.00223805 0.01209941 0.01236304 0.02697524
 0.01266995 0.03555391 0.00222023]


In [52]:
sel_features = [f for f, s in zip(X_columns, model.feature_importances_) if abs(s)> 0.002 ] # 选择绝对值大于二的特征

train5 = train[sel_features]
test5 = test[sel_features]
for col in categorical_feats:
    train5 = pd.concat([train[col],train],axis = 1)
    test5 = pd.concat([test[col],test],axis = 1)

# 需要的数据
- train
- target
- test
- features
- categorical_feats

In [53]:

features = train.columns
target = train["tradeMoney"]

In [54]:
params = {
    'num_leaves': 31,
    'min_data_in_leaf': 20,
    'min_child_samples':20,
    'objective': 'regression',
    'learning_rate': 0.01,
    "boosting": "gbdt",
    "feature_fraction": 0.8,
    "bagging_freq": 1,
    "bagging_fraction": 0.85,
    "bagging_seed": 23,
    "metric": 'rmse',
    "lambda_l1": 0.2,
    "nthread": 4,
}

In [55]:
folds = KFold(n_splits=5, shuffle=True, random_state=2333)

oof_lgb = np.zeros(len(train))
predictions_lgb = np.zeros(len(test))
feature_importance_df = pd.DataFrame()

for fold_, (trn_idx, val_idx) in enumerate(folds.split(train.values, target.values)):
    print("fold {}".format(fold_))
    trn_data = lgb.Dataset(train.iloc[trn_idx], label=target.iloc[trn_idx])# categorical_feature=categorical_feats)
    val_data = lgb.Dataset(train.iloc[val_idx], label=target.iloc[val_idx])#, categorical_feature=categorical_feats)

    num_round = 10000
    clf = lgb.train(params, trn_data, num_round, valid_sets = [trn_data, val_data], verbose_eval=500, early_stopping_rounds = 200)
    
    oof_lgb[val_idx] = clf.predict(train.iloc[val_idx], num_iteration=clf.best_iteration)
    
    fold_importance_df = pd.DataFrame()
    fold_importance_df["feature"] = features
    fold_importance_df["importance"] = clf.feature_importance()
    fold_importance_df["fold"] = fold_ + 1
    feature_importance_df = pd.concat([feature_importance_df, fold_importance_df], axis=0)
    
    predictions_lgb += clf.predict(test, num_iteration=clf.best_iteration) / folds.n_splits
    
print("CV Score: {:<8.5f}".format(r2_score(target, oof_lgb)))

fold 0
Training until validation scores don't improve for 200 rounds.
[500]	training's rmse: 177.388	valid_1's rmse: 201.322
[1000]	training's rmse: 107.51	valid_1's rmse: 151.512
[1500]	training's rmse: 87.2385	valid_1's rmse: 138.434
[2000]	training's rmse: 75.4785	valid_1's rmse: 133.749
[2500]	training's rmse: 67.1982	valid_1's rmse: 131.114
[3000]	training's rmse: 60.5105	valid_1's rmse: 129.504
[3500]	training's rmse: 55.0544	valid_1's rmse: 128.475
[4000]	training's rmse: 50.3845	valid_1's rmse: 127.384
[4500]	training's rmse: 46.3412	valid_1's rmse: 126.724
[5000]	training's rmse: 42.6049	valid_1's rmse: 126.27
Early stopping, best iteration is:
[4865]	training's rmse: 43.5841	valid_1's rmse: 126.211
fold 1
Training until validation scores don't improve for 200 rounds.
[500]	training's rmse: 180.562	valid_1's rmse: 194.42
[1000]	training's rmse: 110	valid_1's rmse: 134.823
[1500]	training's rmse: 89.2787	valid_1's rmse: 119.476
[2000]	training's rmse: 78.3347	valid_1's rmse: 11

In [None]:
cols = (feature_importance_df[["feature", "importance"]]
        .groupby("feature")
        .mean()
        .sort_values(by="importance", ascending=False)[:1000].index)

best_features = feature_importance_df.loc[feature_importance_df.feature.isin(cols)]

plt.figure(figsize=(14,40))
sns.barplot(x="importance",
            y="feature",
            data=best_features.sort_values(by="importance",
                                           ascending=False))
plt.title('LightGBM Features (avg over folds)')
plt.tight_layout()

In [None]:
from sklearn.metrics import r2_score
def online_score(pred):
    print("预测结果最大值：{},预测结果最小值：{}".format(pred.max(),pred.min()))
    # a榜测分
    conmbine1 = pd.read_csv("G:/compete/ai/city_rent_precent/data_set/sub_a_913.csv",engine = "python")#,header=None)
    score1 = r2_score(pred, conmbine1)
    print(score1)
    return score1

In [None]:

online_score(predictions_lgb)