In [1]:
import pandas as pd
import numpy as np

In [2]:
train=pd.read_csv('train_NIR5Yl1.csv')

In [3]:
train.shape

(330045, 7)

In [4]:
train.head()

Unnamed: 0,ID,Tag,Reputation,Answers,Username,Views,Upvotes
0,52664,a,3942,2,155623,7855,42
1,327662,a,26046,12,21781,55801,1175
2,468453,c,1358,4,56177,8067,60
3,96996,a,264,3,168793,27064,9
4,131465,c,4271,4,112223,13986,83


### missing

In [5]:
np.sum(train.isnull())/train.shape[0]

ID            0
Tag           0
Reputation    0
Answers       0
Username      0
Views         0
Upvotes       0
dtype: float64

In [6]:
train.dtypes

ID              int64
Tag            object
Reputation    float64
Answers       float64
Username        int64
Views         float64
Upvotes       float64
dtype: object

### dependent variable

In [7]:
dv=train.Upvotes.values
dv[dv==0]=0.1
dv=np.log(dv)
np.min(dv)

-2.3025850929940455

### variables

In [8]:
pd.unique(train.Tag.values.ravel())

array(['a', 'c', 'r', 'j', 'p', 's', 'h', 'o', 'i', 'x'], dtype=object)

In [9]:
#train2=train.drop(['ID','Username','Upvotes'],axis=1)

In [10]:
train2=train.drop(['ID','Upvotes'],axis=1)

In [11]:
train2['Tag']=train2.Tag.astype('category').cat.codes

In [12]:
pd.unique(train2.Tag.values.ravel())

array([0, 1, 7, 4, 6, 8, 2, 5, 3, 9], dtype=int64)

In [13]:
train2.head()

Unnamed: 0,Tag,Reputation,Answers,Username,Views
0,0,3942,2,155623,7855
1,0,26046,12,21781,55801
2,1,1358,4,56177,8067
3,0,264,3,168793,27064
4,1,4271,4,112223,13986


from sklearn.cross_validation import train_test_split

devi=train_test_split(train2.index,test_size=0.3,random_state=1615)[0]
vali=train_test_split(train2.index,test_size=0.3,random_state=1615)[1]

dev=train2.iloc[devi,:]
val=train2.iloc[vali,:]

devdv=dv[devi]
valdv=dv[vali]

from sklearn.linear_model import LinearRegression

model = LinearRegression(n_jobs=4)

LR=model.fit(dev.values,devdv)

LR.score(val,valdv)

pred=LR.predict(val.values)

from sklearn.metrics import mean_squared_error

np.sqrt(mean_squared_error(valdv,pred))

from sklearn.ensemble import RandomForestRegressor

model2 = RandomForestRegressor(n_jobs=1,random_state=2551,verbose=0)

from sklearn.grid_search import GridSearchCV

GS = GridSearchCV(model2,{'min_samples_leaf':[100,200],'max_depth':[5,7,9],'max_features':[0.4,0.6,0.8],'n_estimators':[500,700,900]},n_jobs=4,cv=5,verbose=100)

RF=model2.fit(dev.values,devdv)

RF.score(val.values,valdv)

GS.fit(dev.values,devdv)

print(GS.best_params_)
print(GS.best_score_)

GS2 = GridSearchCV(model2,{'min_samples_leaf':[100],'max_depth':[9,11,13],'max_features':[0.7,0.8,0.9],'n_estimators':[600,700,800]},n_jobs=4,cv=5,verbose=10000)

GS2.fit(dev.values,devdv)

print(GS2.best_params_)
print(GS2.best_score_)

model2 = RandomForestRegressor(n_estimators=700,max_depth=11,min_samples_leaf=100,max_features=0.8,n_jobs=4,random_state=2551,verbose=10000)

RF=model2.fit(dev.values,devdv)

RF.score(val.values,valdv)

pred=RF.predict(val.values)

np.sqrt(mean_squared_error(valdv,pred))

test=pd.read_csv('test_8i3B3FC.csv')

test.columns

pd.unique(test.Tag.values.ravel())

test2=test.drop(['ID','Username'],axis=1)

pd.Categorical.from_codes(codes=pd.unique(train2.Tag.values.ravel()),categories=pd.unique(train.Tag.values.ravel()))

test2['Tag']=test2.Tag.astype('category').cat.codes

pd.unique(test2.Tag.values.ravel())

pred=RF.predict(test2.values)

pred=LR.predict(test2.values)

pred=np.round(pred)
sub1=pd.concat([test,pd.Series(pred)],axis=1)
sub2=pd.DataFrame({'ID':test.ID.values,'Upvotes':pred})
sub2.to_csv('submission2.csv',index=False)

### variables creation

##### questions, answers, views per user

In [14]:
peruser=train2.groupby('Username').apply(lambda x: pd.Series({'quesperuser':np.count_nonzero(x),
                                                              'ansperuser':np.sum(x.Answers),
                                                              'viewsperuser':np.sum(x.Views),
                                                              'meanansperuser':np.mean(x.Answers),
                                                              'meanviewsperuser':np.mean(x.Views)}))

In [15]:
train2=train2.merge(right=peruser,how="outer",left_on=['Username'],right_index=True)

##### answers, views as % of total, mean answers, views per user

In [16]:
train2=train2.assign(ansperperuser=np.where(train2.ansperuser.values==0,0,train2.Answers/train2.ansperuser),
              viewsperperuser=train2.Views/train2.viewsperuser,
              anspermeanperuser=np.where(train2.meanansperuser.values==0,0,train2.Answers/train2.meanansperuser),
              viewspermeanperuser=train2.Views/train2.meanviewsperuser)

##### tag-wise total questions, answers, views and mean answers, views

In [17]:
pertag=train2.groupby('Tag').apply(lambda x: pd.Series({'quespertag':np.count_nonzero(x),
                                                        'anspertag':np.sum(x.Answers),
                                                        'viewspertag':np.sum(x.Views),
                                                        'meananspertag':np.mean(x.Answers),
                                                        'meanviewspertag':np.mean(x.Views)}))

In [18]:
train2=train2.merge(right=pertag,how="outer",left_on=['Tag'],right_index=True)

##### answers, views as % of tag-wise total, mean answers, views

In [19]:
train2=train2.assign(ansperpertag=train2.Answers/train2.anspertag,
                     viewsperpertag=train2.Views/train2.viewspertag,
                     anspermeanpertag=train2.Answers/train2.meananspertag,
                     viewspermeanpertag=train2.Views/train2.meanviewspertag)

##### tag-wise mean questions, answers, views per user

In [20]:
peruserpertag=train2.loc[~train2.duplicated(['Tag','Username','quesperuser','ansperuser','viewsperuser']),
                         ['Tag','Username','quesperuser','ansperuser','viewsperuser']].groupby('Tag').apply(lambda x: pd.Series({
    'meanquesperuserpertag':np.mean(x.quesperuser),
    'meanansperuserpertag':np.mean(x.ansperuser),
    'meanviewsperuserpertag':np.mean(x.viewsperuser)}))

In [21]:
train2=train2.merge(right=peruserpertag,how="outer",left_on=['Tag'],right_index=True)

##### answers, views as % of tag-wise mean answers, views per user

In [22]:
train2=train2.assign(anspermeanperuserpertag=train2.Answers/train2.meanansperuserpertag,
                     viewspermeanperuserpertag=train2.Views/train2.meanviewsperuserpertag)

##### questions, answers, views per user as % of tag-wise mean questions, answers, views per user

In [23]:
train2=train2.assign(ansperuserpermeanperuserpertag=train2.ansperuser/train2.meanansperuserpertag,
                     viewsperuserpermeanperuserpertag=train2.viewsperuser/train2.meanviewsperuserpertag)

#### reputation bins

In [24]:
cuts=train2.Reputation.quantile([i/10 for i in range(11)])

In [25]:
train2=train2.assign(repbin=pd.cut(train2.Reputation,bins=cuts,labels=[i for i in range(10)],include_lowest=True).cat.codes)

##### reputation bin-wise total questions, answers, views and mean answers, views

In [26]:
perrepbin=train2.groupby('repbin').apply(lambda x: pd.Series({'quesperrepbin':np.count_nonzero(x),
                                                        'ansperrepbin':np.sum(x.Answers),
                                                        'viewsperrepbin':np.sum(x.Views),
                                                        'meanansperrepbin':np.mean(x.Answers),
                                                        'meanviewsperrepbin':np.mean(x.Views)}))

In [27]:
train2=train2.merge(right=perrepbin,how="outer",left_on=['repbin'],right_index=True)

##### answers, views as % of reputation bin-wise total, mean answers, views

In [28]:
train2=train2.assign(ansperperrepbin=train2.Answers/train2.ansperrepbin,
                     viewsperperrepbin=train2.Views/train2.viewsperrepbin,
                     anspermeanperrepbin=train2.Answers/train2.meanansperrepbin,
                     viewspermeanperrepbin=train2.Views/train2.meanviewsperrepbin)

##### reputation bin-wise mean questions, answers, views per user

In [29]:
peruserperrepbin=train2.loc[~train2.duplicated(['repbin','Username','quesperuser','ansperuser','viewsperuser']),
                         ['repbin','Username','quesperuser','ansperuser','viewsperuser']].groupby('repbin').apply(lambda x: pd.Series({
    'meanquesperuserperrepbin':np.mean(x.quesperuser),
    'meanansperuserperrepbin':np.mean(x.ansperuser),
    'meanviewsperuserperrepbin':np.mean(x.viewsperuser)}))

In [30]:
train2=train2.merge(right=peruserperrepbin,how="outer",left_on=['repbin'],right_index=True)

##### answers, views as % of reputation bin-wise mean answers, views per user

In [31]:
train2=train2.assign(anspermeanperuserperrepbin=train2.Answers/train2.meanansperuserperrepbin,
                     viewspermeanperuserperrepbin=train2.Views/train2.meanviewsperuserperrepbin)

##### questions, answers, views per user as % of reputation bin-wise mean questions, answers, views per user

In [32]:
train2=train2.assign(ansperuserpermeanperuserperrepbin=train2.ansperuser/train2.meanansperuserperrepbin,
                     viewsperuserpermeanperuserperrepbin=train2.viewsperuser/train2.meanviewsperuserperrepbin)

In [33]:
train2=train2.drop(['Tag','Username','repbin'],axis=1)

In [34]:
from sklearn.cross_validation import train_test_split

In [35]:
devi=train_test_split(train2.index,test_size=0.3,random_state=1615)[0]
vali=train_test_split(train2.index,test_size=0.3,random_state=1615)[1]

In [36]:
dev=train2.iloc[devi,:]
val=train2.iloc[vali,:]

In [37]:
devdv=dv[devi]
valdv=dv[vali]

In [38]:
from sklearn.ensemble import RandomForestRegressor

In [40]:
RF=model2.fit(dev.values,devdv)

building tree 1 of 75building tree 2 of 75
building tree 3 of 75
building tree 4 of 75

building tree 5 of 75
building tree 6 of 75
building tree 7 of 75[Parallel(n_jobs=6)]: Done   6 out of  75 | elapsed:    3.5s remaining:   40.9s

building tree 8 of 75[Parallel(n_jobs=6)]: Done   4 out of  75 | elapsed:    3.5s remaining:  1.1min

building tree 9 of 75[Parallel(n_jobs=6)]: Done   1 out of  75 | elapsed:    3.5s remaining:  4.5min

building tree 10 of 75[Parallel(n_jobs=6)]: Done   3 out of  75 | elapsed:    3.6s remaining:  1.5min

building tree 11 of 75[Parallel(n_jobs=6)]: Done   2 out of  75 | elapsed:    3.6s remaining:  2.3min

building tree 12 of 75[Parallel(n_jobs=6)]: Done   5 out of  75 | elapsed:    3.7s remaining:   52.9s

building tree 13 of 75[Parallel(n_jobs=6)]: Done   9 out of  75 | elapsed:    7.1s remaining:   52.6s

building tree 14 of 75[Parallel(n_jobs=6)]: Done   8 out of  75 | elapsed:    7.2s remaining:  1.0min

building tree 15 of 75[Parallel(n_jobs=6)]: Don

In [41]:
RF.score(val.values,valdv)

[Parallel(n_jobs=6)]: Done   1 out of  75 | elapsed:    0.0s remaining:    2.9s
[Parallel(n_jobs=6)]: Done   3 out of  75 | elapsed:    0.0s remaining:    0.9s
[Parallel(n_jobs=6)]: Done   2 out of  75 | elapsed:    0.0s remaining:    1.4s
[Parallel(n_jobs=6)]: Done   4 out of  75 | elapsed:    0.0s remaining:    0.6s
[Parallel(n_jobs=6)]: Done   5 out of  75 | elapsed:    0.0s remaining:    0.5s
[Parallel(n_jobs=6)]: Done   6 out of  75 | elapsed:    0.0s remaining:    0.4s
[Parallel(n_jobs=6)]: Done   8 out of  75 | elapsed:    0.0s remaining:    0.2s
[Parallel(n_jobs=6)]: Done   9 out of  75 | elapsed:    0.0s remaining:    0.2s
[Parallel(n_jobs=6)]: Done  11 out of  75 | elapsed:    0.0s remaining:    0.1s
[Parallel(n_jobs=6)]: Done  10 out of  75 | elapsed:    0.0s remaining:    0.2s
[Parallel(n_jobs=6)]: Done  12 out of  75 | elapsed:    0.0s remaining:    0.1s
[Parallel(n_jobs=6)]: Done  13 out of  75 | elapsed:    0.0s remaining:    0.1s
[Parallel(n_jobs=6)]: Done  14 out of  7

-0.00010541535192287732

In [42]:
pred=RF.predict(val.values)

[Parallel(n_jobs=6)]: Done   3 out of  75 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=6)]: Done   4 out of  75 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=6)]: Done   2 out of  75 | elapsed:    0.0s remaining:    0.2s
[Parallel(n_jobs=6)]: Done   1 out of  75 | elapsed:    0.0s remaining:    0.5s
[Parallel(n_jobs=6)]: Done   6 out of  75 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=6)]: Done   9 out of  75 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=6)]: Done   8 out of  75 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=6)]: Done  10 out of  75 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=6)]: Done   7 out of  75 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=6)]: Done  11 out of  75 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=6)]: Done   5 out of  75 | elapsed:    0.0s remaining:    0.1s
[Parallel(n_jobs=6)]: Done  15 out of  75 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=6)]: Done  12 out of  7

In [44]:
from sklearn.metrics import mean_squared_error

In [45]:
np.sqrt(mean_squared_error(valdv,pred))

2.0595551714875668

In [58]:
test=pd.read_csv('test_8i3B3FC.csv')

In [59]:
test.columns

Index(['ID', 'Tag', 'Reputation', 'Answers', 'Username', 'Views'], dtype='object')

In [60]:
pd.unique(test.Tag.values.ravel())

array(['a', 'c', 'i', 'x', 's', 'p', 'o', 'j', 'h', 'r'], dtype=object)

In [67]:
test2=test.drop('ID',axis=1)

In [38]:
#pd.Categorical.from_codes(codes=pd.unique(train2.Tag.values.ravel()),categories=pd.unique(train.Tag.values.ravel()))

In [68]:
test2['Tag']=test2.Tag.astype('category').cat.codes

In [69]:
pd.unique(test2.Tag.values.ravel())

array([0, 1, 3, 9, 8, 6, 5, 4, 2, 7], dtype=int64)

In [97]:
pred=RF.predict(test2.values)

[Parallel(n_jobs=6)]: Done   1 out of 177 | elapsed:    0.0s remaining:    2.0s
[Parallel(n_jobs=6)]: Done   3 out of 177 | elapsed:    0.0s remaining:    0.8s
[Parallel(n_jobs=6)]: Done   6 out of 305 | elapsed:    0.0s remaining:    0.9s
[Parallel(n_jobs=6)]: Done   4 out of 305 | elapsed:    0.0s remaining:    1.4s
[Parallel(n_jobs=6)]: Done   5 out of 305 | elapsed:    0.0s remaining:    1.3s
[Parallel(n_jobs=6)]: Done   2 out of 305 | elapsed:    0.0s remaining:    3.5s
[Parallel(n_jobs=6)]: Done   9 out of 305 | elapsed:    0.0s remaining:    0.8s
[Parallel(n_jobs=6)]: Done  10 out of 305 | elapsed:    0.0s remaining:    0.8s
[Parallel(n_jobs=6)]: Done   7 out of 305 | elapsed:    0.0s remaining:    1.4s
[Parallel(n_jobs=6)]: Done   8 out of 305 | elapsed:    0.0s remaining:    1.2s
[Parallel(n_jobs=6)]: Done  12 out of 305 | elapsed:    0.0s remaining:    0.8s
[Parallel(n_jobs=6)]: Done  11 out of 305 | elapsed:    0.0s remaining:    0.9s
[Parallel(n_jobs=6)]: Done  14 out of 30

[Parallel(n_jobs=6)]: Done 203 out of 800 | elapsed:    0.4s remaining:    1.2s
[Parallel(n_jobs=6)]: Done 200 out of 800 | elapsed:    0.4s remaining:    1.3s
[Parallel(n_jobs=6)]: Done 206 out of 800 | elapsed:    0.4s remaining:    1.2s
[Parallel(n_jobs=6)]: Done 205 out of 800 | elapsed:    0.4s remaining:    1.2s
[Parallel(n_jobs=6)]: Done 208 out of 800 | elapsed:    0.4s remaining:    1.2s
[Parallel(n_jobs=6)]: Done 204 out of 800 | elapsed:    0.4s remaining:    1.2s
[Parallel(n_jobs=6)]: Done 210 out of 800 | elapsed:    0.4s remaining:    1.2s
[Parallel(n_jobs=6)]: Done 211 out of 800 | elapsed:    0.4s remaining:    1.2s
[Parallel(n_jobs=6)]: Done 213 out of 800 | elapsed:    0.4s remaining:    1.1s
[Parallel(n_jobs=6)]: Done 212 out of 800 | elapsed:    0.4s remaining:    1.2s
[Parallel(n_jobs=6)]: Done 207 out of 800 | elapsed:    0.4s remaining:    1.2s
[Parallel(n_jobs=6)]: Done 215 out of 800 | elapsed:    0.4s remaining:    1.1s
[Parallel(n_jobs=6)]: Done 216 out of 80

[Parallel(n_jobs=6)]: Done 407 out of 800 | elapsed:    0.8s remaining:    0.7s
[Parallel(n_jobs=6)]: Done 409 out of 800 | elapsed:    0.8s remaining:    0.7s
[Parallel(n_jobs=6)]: Done 410 out of 800 | elapsed:    0.8s remaining:    0.7s
[Parallel(n_jobs=6)]: Done 411 out of 800 | elapsed:    0.8s remaining:    0.7s
[Parallel(n_jobs=6)]: Done 412 out of 800 | elapsed:    0.8s remaining:    0.7s
[Parallel(n_jobs=6)]: Done 413 out of 800 | elapsed:    0.8s remaining:    0.7s
[Parallel(n_jobs=6)]: Done 404 out of 800 | elapsed:    0.8s remaining:    0.8s
[Parallel(n_jobs=6)]: Done 414 out of 800 | elapsed:    0.8s remaining:    0.7s
[Parallel(n_jobs=6)]: Done 416 out of 800 | elapsed:    0.8s remaining:    0.7s
[Parallel(n_jobs=6)]: Done 417 out of 800 | elapsed:    0.8s remaining:    0.7s
[Parallel(n_jobs=6)]: Done 418 out of 800 | elapsed:    0.8s remaining:    0.7s
[Parallel(n_jobs=6)]: Done 419 out of 800 | elapsed:    0.8s remaining:    0.7s
[Parallel(n_jobs=6)]: Done 420 out of 80

[Parallel(n_jobs=6)]: Done 512 out of 800 | elapsed:    1.0s remaining:    0.5s
[Parallel(n_jobs=6)]: Done 515 out of 800 | elapsed:    1.0s remaining:    0.5s
[Parallel(n_jobs=6)]: Done 516 out of 800 | elapsed:    1.0s remaining:    0.5s
[Parallel(n_jobs=6)]: Done 513 out of 800 | elapsed:    1.0s remaining:    0.5s
[Parallel(n_jobs=6)]: Done 517 out of 800 | elapsed:    1.0s remaining:    0.5s
[Parallel(n_jobs=6)]: Done 518 out of 800 | elapsed:    1.0s remaining:    0.5s
[Parallel(n_jobs=6)]: Done 520 out of 800 | elapsed:    1.0s remaining:    0.5s
[Parallel(n_jobs=6)]: Done 519 out of 800 | elapsed:    1.0s remaining:    0.5s
[Parallel(n_jobs=6)]: Done 521 out of 800 | elapsed:    1.0s remaining:    0.5s
[Parallel(n_jobs=6)]: Done 522 out of 800 | elapsed:    1.0s remaining:    0.5s
[Parallel(n_jobs=6)]: Done 523 out of 800 | elapsed:    1.0s remaining:    0.5s
[Parallel(n_jobs=6)]: Done 525 out of 800 | elapsed:    1.0s remaining:    0.5s
[Parallel(n_jobs=6)]: Done 526 out of 80

[Parallel(n_jobs=6)]: Done 622 out of 800 | elapsed:    1.2s remaining:    0.3s
[Parallel(n_jobs=6)]: Done 605 out of 800 | elapsed:    1.2s remaining:    0.3s
[Parallel(n_jobs=6)]: Done 615 out of 800 | elapsed:    1.2s remaining:    0.3s
[Parallel(n_jobs=6)]: Done 626 out of 800 | elapsed:    1.2s remaining:    0.3s
[Parallel(n_jobs=6)]: Done 628 out of 800 | elapsed:    1.2s remaining:    0.3s
[Parallel(n_jobs=6)]: Done 627 out of 800 | elapsed:    1.2s remaining:    0.3s
[Parallel(n_jobs=6)]: Done 629 out of 800 | elapsed:    1.2s remaining:    0.3s
[Parallel(n_jobs=6)]: Done 630 out of 800 | elapsed:    1.2s remaining:    0.3s
[Parallel(n_jobs=6)]: Done 631 out of 800 | elapsed:    1.2s remaining:    0.2s
[Parallel(n_jobs=6)]: Done 633 out of 800 | elapsed:    1.2s remaining:    0.2s
[Parallel(n_jobs=6)]: Done 634 out of 800 | elapsed:    1.2s remaining:    0.3s
[Parallel(n_jobs=6)]: Done 624 out of 800 | elapsed:    1.2s remaining:    0.3s
[Parallel(n_jobs=6)]: Done 623 out of 80

[Parallel(n_jobs=6)]: Done 728 out of 800 | elapsed:    1.4s remaining:    0.0s
[Parallel(n_jobs=6)]: Done 730 out of 800 | elapsed:    1.4s remaining:    0.0s
[Parallel(n_jobs=6)]: Done 731 out of 800 | elapsed:    1.4s remaining:    0.0s
[Parallel(n_jobs=6)]: Done 732 out of 800 | elapsed:    1.4s remaining:    0.0s
[Parallel(n_jobs=6)]: Done 716 out of 800 | elapsed:    1.4s remaining:    0.1s
[Parallel(n_jobs=6)]: Done 735 out of 800 | elapsed:    1.4s remaining:    0.0s
[Parallel(n_jobs=6)]: Done 729 out of 800 | elapsed:    1.4s remaining:    0.0s
[Parallel(n_jobs=6)]: Done 737 out of 800 | elapsed:    1.4s remaining:    0.0s
[Parallel(n_jobs=6)]: Done 738 out of 800 | elapsed:    1.4s remaining:    0.0s
[Parallel(n_jobs=6)]: Done 739 out of 800 | elapsed:    1.4s remaining:    0.0s
[Parallel(n_jobs=6)]: Done 740 out of 800 | elapsed:    1.4s remaining:    0.0s
[Parallel(n_jobs=6)]: Done 741 out of 800 | elapsed:    1.4s remaining:    0.0s
[Parallel(n_jobs=6)]: Done 743 out of 80

In [98]:
pred=round(np.exp(pred))
#sub1=pd.concat([test,pd.Series(pred)],axis=1)
sub4=pd.DataFrame({'ID':test.ID.values,'Upvotes':pred})
sub4.to_csv('submission4.csv',index=False)