In [None]:
# 我们着重讨论不同区域的显著性影响因素
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
data = pd.read_csv("C:/Users/86187/Desktop/新加坡国立大学暑期学校相关文件/data_cleaned.csv")
data.describe()

In [None]:
data.shape

In [None]:
data['totalRoom'] = data['livingRoom'] + data['drawingRoom'] + data['kitchen'] + data['bathRoom']
data['houseAge'] = data['tradeTimeModefiy']//365 - data['constructionTime']
data['houseCondition'] = data['renovationCondition'] + data['subway'] + data['elevator'] 

In [None]:
import folium
# 创建地图对象，设置初始位置和缩放级别
m = folium.Map(location=[39.906217, 116.3912757], zoom_start=13)
# 在地图上添加一个标记点
folium.Marker([39.906217, 116.3912757], popup='Beijing').add_to(m)
display(m)

In [None]:
# 对district进行分组并计算每个区域的样本个数
district_counts = data.groupby('district').size()
# 绘制柱状图
district_counts.plot(kind='bar')

plt.xlabel('District')
plt.ylabel('Number of Properties')
plt.title('Number of Properties by District')
plt.xticks(range(len(district_counts)), ['Dongcheng', 'Fengtai', 'Tongzhou', 'Daxing', 'Fangshan', 'Changping', 'Chaoyang', 'Haidian', 'Shijingshan', 'Xicheng', 'Pinggu', 'Mentougou', 'Shunyi'], rotation=45)
plt.show()

In [None]:
# 我们分区块研究影响房价的显著因素
# 石景山区
from folium.plugins import MarkerCluster
Shijingshan_data = data[data['district'] == 9]
map_center = [Shijingshan_data.iloc[0]['Lat'], Shijingshan_data.iloc[0]['Lng']]
m = folium.Map(location=map_center, zoom_start=12)
# 创建一个 MarkerCluster 对象
marker_cluster = MarkerCluster().add_to(m)
# 遍历为每个点添加标记到 MarkerCluster 中，而不是直接添加到地图上
for index, row in Shijingshan_data.iterrows():
    folium.Marker([row['Lat'], row['Lng']]).add_to(marker_cluster)
display(m)

In [None]:
from folium.plugins import HeatMap
# 东城区
# 我们先观察房屋价格在地理空间中的分布情况
map_center = [Shijingshan_data.iloc[0]['Lat']-0.03, Shijingshan_data.iloc[0]['Lng']]
m = folium.Map(location=map_center, zoom_start=12)
# 权重设置为每个点的price
heat_data = [[row['Lat'], row['Lng'], row['attractiveness']] for index, row in Shijingshan_data.iterrows()]
# 创建热度图并添加到地图上，使用price作为权重
HeatMap(heat_data,blur=4, radius=8).add_to(m)
m

In [None]:
# 我们在东城区的范围内研究房价的显著影响因素
# 东城区
import seaborn as sns
# 使用Seaborn绘制ladderRatio与attarctiveness的关系
sns.scatterplot(data=Shijingshan_data, x='ladderRatio', y='attractiveness')
plt.title('Ladder Ratio vs Attractiveness in Dongcheng')
plt.xlabel('Ladder Ratio')
plt.ylabel('Attractiveness')
plt.show()

In [None]:
# 计算ladderRatio和attractiveness之间的相关系数
correlation_matrix = Shijingshan_data[['ladderRatio', 'attractiveness']].corr()
# 打印相关系数矩阵
print(correlation_matrix)
# 从相关系数的角度看一般条件下ladderRatio在东城区不是一个显著影响房屋吸引力的因素

In [None]:
# 我们再观察东城区的房价与楼层高度的关系
high_floor = Shijingshan_data[Shijingshan_data['floor'] > 10]
low_floor = Shijingshan_data[Shijingshan_data['floor'] <= 10]
print("东城区高层建筑共{high_floor}个，低层建筑共{low_floor}个,高层建筑占比为{high_floor_ratio:.2f}%".format(high_floor=len(high_floor), low_floor=len(low_floor), high_floor_ratio=len(high_floor)/len(Shijingshan_data)*100))

In [None]:
# 绘制饼图
plt.figure(figsize=(8, 8))
plt.pie([len(high_floor), len(low_floor)], labels=['High Floor', 'Low Floor'], autopct='%1.1f%%', startangle=140)
plt.title('Dongcheng Building Type Distribution')
plt.show()

In [None]:
# 计算floor和attractivness之间的相关系数
correlation_matrix = Shijingshan_data[['floor', 'attractiveness']].corr()
print(correlation_matrix)

In [None]:
# 我们在东城区的范围内研究房价的显著影响因素
# 东城区
# 使用Seaborn绘制ladderRatio与attarctiveness的关系
sns.scatterplot(data=Shijingshan_data, x='floor', y='attractiveness')
plt.title('Floor vs Attarctiveness in Dongcheng')
plt.xlabel('Floor')
plt.ylabel('Attarctiveness')
plt.show()

In [None]:
# 发现在东城区中整体上floor和房屋的受欢迎程度没有关联
# 计算在高层条件下ladderRatio和attractiveness之间的相关系数
correlation_matrix = high_floor[['ladderRatio', 'attractiveness']].corr()
print(correlation_matrix)

In [None]:
correlation_matrix = low_floor[['ladderRatio', 'attractiveness']].corr()
print(correlation_matrix)
# 对于低层建筑, ladderRatio和attractiveness之间的关联性相较于高层建筑要更弱,虽然面对高层建筑时人们会考虑人均楼梯数目但总体来说不显著

In [None]:
sns.scatterplot(data=Shijingshan_data, x='communityAverage', y='attractiveness')
plt.title('CommunityAverage vs Attarctiveness in Dongcheng')
plt.xlabel('CommunityAverage')
plt.ylabel('Attarctiveness')
plt.show()

In [None]:
# 研究communityAverage与attractiveness之间的关联度
correlation_matrix = Shijingshan_data[['communityAverage', 'attractiveness']].corr()
print(correlation_matrix)

In [None]:
sns.scatterplot(data=Shijingshan_data, x='totalRoom', y='attractiveness')
plt.title('TotalRoom vs Attarctiveness in Dongcheng')
plt.xlabel('TotalRoom')
plt.ylabel('Attarctiveness')
plt.show()

In [None]:
# 研究totalRoom与attractiveness之间的关联度
correlation_matrix = Shijingshan_data[['totalRoom', 'attractiveness']].corr()
print(correlation_matrix)

In [None]:
# 研究totalRoom与attractiveness之间的关联度
correlation_matrix = Shijingshan_data[['totalRoom', 'attractiveness']].corr()
print(correlation_matrix)

In [None]:
# 分别计算每种房间的总数
total_livingRoom = Shijingshan_data['livingRoom'].sum()
total_drawingRoom = Shijingshan_data['drawingRoom'].sum()
total_kitchen = Shijingshan_data['kitchen'].sum()
total_bathroom = Shijingshan_data['bathRoom'].sum()
# 将计算结果合并为一个新的Series
room_totals = pd.Series({
    'Living Room': total_livingRoom,
    'Drawing Room': total_drawingRoom,
    'Kitchen': total_kitchen,
    'Bathroom': total_bathroom
})
room_totals.plot(kind='pie', autopct='%1.1f%%', startangle=140)
plt.title('Room Type Distribution in Dongcheng')
plt.ylabel('')
plt.show()

In [None]:
# 我们从一般的角度出发研究房间类型对吸引力的影响
correlation_matrix = Shijingshan_data[['bathRoom','kitchen','drawingRoom','livingRoom' ,'attractiveness']].corr()
print(correlation_matrix)

In [None]:
big_family = Shijingshan_data[Shijingshan_data['livingRoom'] >= 3]
# 我们再进一步研究大的家庭对不同种类房间的需求
correlation_matrix = big_family[['bathRoom','kitchen','drawingRoom', 'attractiveness']].corr()
print(correlation_matrix)

In [None]:
small_family = Shijingshan_data[Shijingshan_data['livingRoom'] < 3]
correlation_matrix = small_family[['bathRoom','kitchen','drawingRoom', 'attractiveness']].corr()
print(correlation_matrix)

In [None]:
# 交易时房龄对吸引力的影响
correlation_matrix = Shijingshan_data[['constructionTime','houseAge','attractiveness']].corr()
print(correlation_matrix)

In [None]:
correlation_matrix = Shijingshan_data[['houseCondition','houseAge','constructionTime','attractiveness']].corr()
print(correlation_matrix)

In [None]:
correlation_matrix = high_floor[['elevator','attractiveness']].corr()
print(correlation_matrix)

In [None]:
type_mapping = {
    1: 'tower',
    2: 'bungalow',
    3: 'combination of plate and tower',
    4: 'plate'
}
Shijingshan_data['buildingType'] = Shijingshan_data['buildingType'].map(type_mapping)
building_type_counts = Shijingshan_data['buildingType'].value_counts()
building_type_counts.plot(kind='pie', autopct='%1.1f%%', startangle=140)
plt.title('Building Type Distribution')
plt.ylabel('')
plt.show()

In [None]:
# 计算每种房屋建筑结构类型的attractiveness平均值
attractiveness_avg = Shijingshan_data.groupby('buildingType')['attractiveness'].mean().sort_values(ascending=False)
print(attractiveness_avg)
attractiveness_avg.plot(kind='bar')
plt.title('Average Attractiveness by House Type')
plt.xlabel('Building Type')
plt.ylabel('Average Attractiveness')
plt.show()

In [None]:
Shijingshan_data= Shijingshan_data[Shijingshan_data['buildingStructure']!=1]
Shijingshan_data['buildingStructure']

In [None]:
type_mapping = {
    2:"mixed",
    3:"brick and wood", 
    4:"brick and concrete", 
    5:"steel", 
    6:"steel-concrete composite"
}
Shijingshan_data['buildingStructure'] = Shijingshan_data['buildingStructure'].map(type_mapping)
building_type_counts = Shijingshan_data['buildingStructure'].value_counts()
explode_values = (0, 0, 0.3, 0.2, 0.1)
building_type_counts.plot(kind='pie', autopct='%1.1f%%', startangle=140, explode=explode_values)
plt.title('Building Structure Distribution')
plt.ylabel('')
plt.show()

In [None]:
# 计算每种房屋建筑结构类型的attractiveness平均值
attractiveness_avg = Shijingshan_data.groupby('buildingStructure')['attractiveness'].mean().sort_values(ascending=False)
print(attractiveness_avg)
attractiveness_avg.plot(kind='bar')
plt.title('Average Attractiveness by Building Structure')
plt.xlabel('Building Structure')
plt.ylabel('Average Attractiveness')
plt.show()