In [75]:
from regression import citylist, loadData
import statsmodels.api as sm, itertools, networkx as nx, community
from sklearn.cluster import KMeans, AgglomerativeClustering

In [2]:
df = loadData(citylist)
cols = df.columns.difference(['lat', 'lon', 'name', 'riders'])
combos = [(a, b) for a, b in itertools.product(cols, cols) if a != b]

In [42]:
data = []
for a, b in combos:
#    a, b = 'near_population', 'near_family'
    y = df[a]
    X = sm.add_constant(df[b])
    result = sm.OLS(y, X).fit()
    rs = result.rsquared
    data.append((a, b, rs))


In [54]:
G = nx.Graph()
for a, b, score in sorted(data, key=lambda x: x[2], reverse=True)[::2]:
    G.add_edge(a, b, weight=score)

In [74]:
nodes_part = community.best_partition(G, weight='weight')
part_louvain = {}
for k, v in nodes_part.items():
    if v in part_louvain:
        part_louvain[v].append(k)
    else:
        part_louvain[v] = [k]

In [78]:
result = KMeans(n_clusters=4).fit(df[cols])
part_kmeans = {}
for part, name in zip(result.predict(df[cols]), cols):
    if part in part_kmeans:
        part_kmeans[part].append(name)
    else:
        part_kmeans[part] = [name]

In [80]:
result = AgglomerativeClustering(n_clusters=4).fit(df[cols])
part_agg = {}
for part, name in zip(result.fit_predict(df[cols]), cols):
    if part in part_agg:
        part_agg[part].append(name)
    else:
        part_agg[part] = [name]

In [92]:
output = []
for i in range(4):
    for j in range(4):
        for k in range(4):
            comlen = len(set(part_kmeans[i]) & set(part_louvain[j]) & set(part_agg[k]))
            avglen = len(part_kmeans[i])+len(part_louvain[j]) + len(part_agg[k])
            output.append((i, j, k, format(comlen*2/avglen)))
for i, j, k, score in sorted(output, key = lambda x: x[3], reverse = True):
    print(i, j, k, score)

1 0 0 0.35514018691588783
2 0 2 0.32558139534883723
0 1 1 0.3055555555555556
3 3 3 0.27906976744186046
3 1 3 0.22727272727272727
0 3 1 0.17142857142857143
1 2 0 0.1518987341772152
3 2 3 0.14634146341463414
2 2 2 0.13861386138613863
2 1 2 0.09345794392523364
0 2 1 0.06060606060606061
2 3 2 0.05714285714285714
2 0 1 0.05084745762711865
2 0 0 0.04838709677419355
1 3 0 0.04819277108433735
3 0 3 0.03636363636363636
3 3 1 0.024691358024691357
0 0 1 0.02127659574468085
2 1 1 0.020833333333333332
2 3 0 0.02
0 0 0 0.0
0 0 2 0.0
0 0 3 0.0
0 1 0 0.0
0 1 2 0.0
0 1 3 0.0
0 2 0 0.0
0 2 2 0.0
0 2 3 0.0
0 3 0 0.0
0 3 2 0.0
0 3 3 0.0
1 0 1 0.0
1 0 2 0.0
1 0 3 0.0
1 1 0 0.0
1 1 1 0.0
1 1 2 0.0
1 1 3 0.0
1 2 1 0.0
1 2 2 0.0
1 2 3 0.0
1 3 1 0.0
1 3 2 0.0
1 3 3 0.0
2 0 3 0.0
2 1 0 0.0
2 1 3 0.0
2 2 0 0.0
2 2 1 0.0
2 2 3 0.0
2 3 1 0.0
2 3 3 0.0
3 0 0 0.0
3 0 1 0.0
3 0 2 0.0
3 1 0 0.0
3 1 1 0.0
3 1 2 0.0
3 2 0 0.0
3 2 1 0.0
3 2 2 0.0
3 3 0 0.0
3 3 2 0.0
