In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# 读取数据

train=pd.read_csv("/kaggle/input/bike-sharing-demand/train.csv")
test=pd.read_csv("/kaggle/input/bike-sharing-demand/test.csv")
submission=pd.read_csv("/kaggle/input/bike-sharing-demand/sampleSubmission.csv")

In [None]:
numerical_list=['season', 'holiday', 'workingday', 'weather', 'temp','atemp', 'humidity', 'windspeed', 'casual', 'registered', 'count']
object_list=["datetime"]

In [None]:
# 考虑到日期与时间会对租用产生影响，这里把年，月，日，几点，季度，该年的哪天，该星期的哪天， 该年的哪个星期也作为特征计算进去
# 故比起原来的8个特征， 这里多出8个特征，共16个特征

combine_list=[train,test]  # 将train与test同步处理出上述特征
for combine in combine_list:
    combine["datetime"]=pd.to_datetime(combine["datetime"])
   

In [None]:
import calendar

for combine in combine_list:
    combine["month"]=[i.month for i in combine["datetime"]]
    combine["year"]=[i.year for i in combine["datetime"]]
    combine["day"]=[i.day for i in combine["datetime"]]
    combine["hour"]=[i.hour for i in combine["datetime"]]
    combine["day_of_year"]=[i.day_of_year for i in combine["datetime"]]
    combine["dayofweek"]=[i.dayofweek for i in combine["datetime"]]
    combine["quarter"]=[i.quarter for i in combine["datetime"]]
    combine["weekofyear"]=[i.weekofyear for i in combine["datetime"]]

In [None]:
combine_list=[train,test]
for combine in combine_list:
    combine.set_index("datetime",inplace=True)

In [None]:
# 数据可视化处理
import matplotlib.pyplot as plt
import seaborn as sns

fig, axis = plt.subplots(2, 2)
fig.set_size_inches(12, 10)
sns.boxplot(data=train,y="count",orient="v",ax=axis[0][0])
sns.boxplot(data=train,y="count",x="season",orient="v",ax=axis[0][1])
sns.boxplot(data=train,y="count",x="hour",orient="v",ax=axis[1][0])
sns.boxplot(data=train,y="count",x="workingday",orient="v",ax=axis[1][1])
# 使用中文标签时字库中没有该符号，会出现框框，故用英文标签

axis[0][0].set(ylabel="count", title="box plot on count")
axis[0][1].set(xlabel="season", ylabel="count", title="box plot on count across season")
axis[1][0].set(xlabel="hours", ylabel="count", title="box plot on count across hours")
axis[1][1].set(xlabel="working day", ylabel="count", title="box plot on count across working day")

In [None]:
# 相关系数计算
# 计算除了上述四个特征以外的12个特征与租赁总数的相关系数
corrmat = train[["temp","atemp","humidity","windspeed","month","year","day","hour","day_of_year","dayofweek","quarter","weekofyear","count"]].corr()
m = np.array(corrmat)
m[np.tril_indices_from(m)]=False
fig, ax = plt.subplots()
fig.set_size_inches(20, 10)
sns.heatmap(corrmat, mask=m, vmax=0.8, square=True, annot=True)

In [None]:
# 搭建ANN模型
from sklearn.preprocessing import StandardScaler
import tensorflow.keras
from keras.layers import Dense,Dropout
from keras.models import Sequential
from sklearn.model_selection import train_test_split

def ANN_model_for_regression():
    model=Sequential()
    model.add(Dense(64, input_dim=16, activation='relu'))  # 输入层 + 隐藏层
    model.add(Dense(128, input_dim=64, activation='relu'))  # 隐藏层
    model.add(Dense(128, input_dim=128, activation='relu'))  # 隐藏层
    model.add(Dense(64, input_dim=128, activation='relu'))
    model.add(Dense(1, kernel_initializer='normal'))  # 输出层
    
    model.compile(loss='mean_squared_error', optimizer='adam') # 使用MSELoss作为损失函数
    return model

In [None]:
# 数据处理
train.drop(["casual", "registered"], axis=1, inplace=True)
y_train = train[["count"]]
x_train = train.drop(["count"], axis=1)

In [None]:
# 开始训练
model = ANN_model_for_regression()  # 实例化模型
history = model.fit(x_train, y_train, validation_split=0.25, epochs=150)

In [None]:
# 训练损失历史可视化
plt.figure(figsize=(8,5))
plt.plot(history.history["loss"])
plt.xlabel("Epochs")
plt.ylabel("loss")

In [None]:
# 使用训练好的模型进行预测
y_pred=model.predict(test)

In [None]:
y_pred

In [None]:
test["count"]=y_pred  # 写入预测值
test.reset_index(inplace=True)
test.loc[test["count"]<=0,"count"]=0  # 将小于0的无效预测值改为0
test[['datetime','count']].to_csv('submission.csv',index=False)

In [None]:
test