In [148]:
import pandas as pd
import folium
from folium import plugins
import geopandas as gpd
center = (37.1623799231016, 127.05436890115905)

In [149]:
data25 = pd.read_csv("data/feature25.csv")
data50 = pd.read_csv("data/feature50.csv")

# 선정 지점 시각화

In [150]:
def draw_map(point_data):
    # map
    map1 = folium.Map(location=center, zoom_start=13, control_scale=True)

    # all subgroups
    all_subgroups = folium.FeatureGroup(name='all_thing')
    map1.add_child(all_subgroups)

    #osan
    osan_region = plugins.FeatureGroupSubGroup(all_subgroups, 'osan region')
    map1.add_child(osan_region)
    osan = gpd.read_file('../data/original/33.오산시_법정경계(읍면동).geojson',encoding="utf-8")
    folium.GeoJson(osan, style_function=lambda x:{'fillColor': 'transparent', 'color': 'grey'}).add_to(osan_region)

    road_data = gpd.read_file('../data/original/23.오산시_상세도로망_LV6.geojson',encoding="utf-8")
    road_data = road_data[road_data["road_name"].apply(lambda x: "고속도로" not in str(x))].reset_index(drop = True)
    road = plugins.FeatureGroupSubGroup(all_subgroups, 'road')
    map1.add_child(road)
    folium.GeoJson(road_data, style_function=lambda x:{'fillColor': 'transparent', "weight": int(x["properties"]["width"]) ** 1.5 / 2, 'color': 'black'}).add_to(road)

    # point
    check_point = plugins.FeatureGroupSubGroup(all_subgroups, 'check_point')
    map1.add_child(check_point)
    for i in range(0, len(point_data)):
        folium.Circle(
            location = [point_data.loc[i,"lat"],
                        point_data.loc[i,"lon"]],
            radius = 1,
            color = "red",
            fillColor = "red",
            tooltip=str((point_data.loc[i,"lon"], point_data.loc[i,"lat"])),
        ).add_to(check_point)
        folium.Circle(
            location = [point_data.loc[i,"lat"],
                        point_data.loc[i,"lon"]],
            radius = 25,
            color = "red",
            fillColor = "red",
            tooltip=str((point_data.loc[i,"lon"], point_data.loc[i,"lat"])),
        ).add_to(check_point)

    folium.LayerControl(collapsed=False).add_to(map1)
    return map1


In [151]:
# draw_map(data50)

# 회귀, 분류 분석

In [152]:
data25['is_bump20'] = data25['shortest_bump_dist'] < 20
data50['is_bump40'] = data50['shortest_bump_dist'] < 40

data25['is_sidewalk12.5'] = data25['shortest_sidewalk_dist'] < 12.5
data50['is_sidewalk25'] = data50['shortest_sidewalk_dist'] < 25

col = {"x_common": ['overspeed_cam_count100', 'floating_pop_count50','car_count1000', 'child_count1000', 
                  'elem_kinder_count400','numberSchoolZone_count400','num_cram_school_count400', 
                  'shortest_cross_dist','shortest_traffic_signal_dist',],
       "x_25" : ['is_bump20', 'parking_count12.5','is_sidewalk12.5', 'barrier_nearby_count12.5',
                   'chaos1_nearby_count12.5', 'width_nearby_count12.5','cross_road_nearby_count12.5',
                   ],
       "x_50" : ['is_bump40', 'parking_count25','is_sidewalk25', 'barrier_nearby_count25',
                   'chaos1_nearby_count25', 'width_nearby_count25','cross_road_nearby_count25'],
       "y_25" : 'accident_count12.5',
       "y_50" : 'accident_count25',}

## 회귀

### x,y데이터 준비

In [153]:
xy25 = data25[col["x_common"] + col["x_25"] +[col['y_25']]].astype(float)
xy50 = data50[col["x_common"] + col["x_50"] +[col['y_50']]].astype(float)

from sklearn.preprocessing import StandardScaler, MinMaxScaler
# fit_transform -> 한꺼번에 가능
scaler = MinMaxScaler()
xy50.loc[:,:] = scaler.fit_transform(xy50)
xy25.loc[:,:] = scaler.transform(xy25)

x50, y50 = xy50.iloc[:,:-1], xy50.iloc[:,-1]
x25, y25 = xy25.iloc[:,:-1], xy25.iloc[:,-1]


### Linear Regression

In [158]:
from sklearn.linear_model import LinearRegression
lr = LinearRegression(normalize=True)
lr.fit(x50, y50)
lr.score(x25, y25)

-0.09025494115918864

### RandomForestRegressor

In [159]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import RandomizedSearchCV
rf = RandomForestRegressor()
rf.fit(x50, y50)
rf.score(x25, y25)

-0.24310898963642402

## classification


### x,y데이터 준비

In [178]:
xy25 = data25[col["x_common"] + col["x_25"] +[col['y_25']]].astype(float)
xy50 = data50[col["x_common"] + col["x_50"] +[col['y_50']]].astype(float)
xy25[col['y_25']] = (xy25[col['y_25']] > 1).astype(float)
xy50[col['y_50']] = (xy50[col['y_50']] > 1).astype(float)

from sklearn.preprocessing import StandardScaler, MinMaxScaler
# fit_transform -> 한꺼번에 가능
scaler = MinMaxScaler()
xy50.loc[:,:] = scaler.fit_transform(xy50)
xy25.loc[:,:] = scaler.transform(xy25)


x50, y50 = xy50.iloc[:,:-1], xy50.iloc[:,-1]
x25, y25 = xy25.iloc[:,:-1], xy25.iloc[:,-1]


In [179]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression

In [180]:
rf = RandomForestClassifier()
rf = LogisticRegression(max_iter = 1000)
rf.fit(x50, y50)
rf.score(x50, y50)


0.9900306748466258

In [181]:
list(zip(list(x25.columns),rf.coef_[0]))

[('overspeed_cam_count100', 0.6125919910937541),
 ('floating_pop_count50', 1.0187668516245527),
 ('car_count1000', -0.10867463389288161),
 ('child_count1000', -0.37865344501889603),
 ('elem_kinder_count400', 2.1246793058962012),
 ('numberSchoolZone_count400', 0.4670634215170662),
 ('num_cram_school_count400', 1.5594099353069637),
 ('shortest_cross_dist', -0.8119145826177833),
 ('shortest_traffic_signal_dist', -0.7999288014223602),
 ('is_bump20', -0.31145057102441903),
 ('parking_count12.5', 1.1007610248765614),
 ('is_sidewalk12.5', 0.6836415346755382),
 ('barrier_nearby_count12.5', -0.029950565790325692),
 ('chaos1_nearby_count12.5', 1.5054667043925631),
 ('width_nearby_count12.5', 0.45808994615872856),
 ('cross_road_nearby_count12.5', 0.9105081930168608)]

# 클러스터링

In [176]:
xy25 = data25[col["x_common"] + col["x_25"] +[col['y_25']]].astype(float)
xy50 = data50[col["x_common"] + col["x_50"] +[col['y_50']]].astype(float)
xy25[col['y_25']] = (xy25[col['y_25']] > 1).astype(float)
xy50[col['y_50']] = (xy50[col['y_50']] > 1).astype(float)
from sklearn.preprocessing import StandardScaler, MinMaxScaler
# fit_transform -> 한꺼번에 가능
scaler = MinMaxScaler()
xy50.loc[:,:] = scaler.fit_transform(xy50)
xy25.loc[:,:] = scaler.transform(xy25)

In [218]:
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
kmeans = KMeans(n_clusters=4, random_state=None, max_iter= 10000).fit(xy25)

silhouette_score(xy25, kmeans.predict(xy25))

0.3624818296697029

In [None]:
from sklearn.cluster import AffinityPropagation
clustering = AffinityPropagation(random_state=5).fit(xy25)
silhouette_score(xy25, clustering.labels_)