In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import networkx as nx
from networkx.algorithms import bipartite

%matplotlib inline

In [None]:
url = 'https://github.com/kmlee419/patent_analysis/raw/main/tesla_ipc_200.csv'
df = pd.read_csv(url)

### 총 특허 수

In [None]:
len(df['patent'].drop_duplicates())

### 총 코드 수

In [None]:
len(df['code'].drop_duplicates())

### 대상년도

In [None]:
print(min(df['date']), max(df['date']))

### 연도별 총 특허 수 

In [None]:
df2 = df[['patent', 'year']].drop_duplicates()
group_year = df2.groupby('year')['patent'].count()
group_year = group_year.reset_index()

In [None]:
plt.figure(figsize=(10, 5))
sns.lineplot(data=group_year, x='year', y='patent')
plt.show()

### 가장 많이 사용된 코드 top N list

In [None]:
code_count = df['code'].value_counts().reset_index()

In [None]:
code_count.head(10) # 숫자를 바꾸면 top N 조절 가능

### 가장 많이 사용된 코드 top N list (연도별)

In [None]:
year = 2012 # 연도 바꾸기 가능

df_year = df[df['year']==year]
code_count = df_year['code'].value_counts().reset_index()
code_count.head(20) # 순위 개수 바꾸기 가능

### 코드별 연도변화

In [None]:
code = 'H01M10/613' # 코드 바꾸기 가능

df_code = df[df['code']==code]
df_code2 = df_code[['patent', 'year']].drop_duplicates()
code_year = df_code2.groupby('year')['patent'].count()
code_year = code_year.reset_index()

plt.figure(figsize=(10, 5))
sns.lineplot(data=code_year, x='year', y='patent')
plt.show()

### 코드별 전년대비 변화량 

In [None]:
years = list(df['year'].drop_duplicates().sort_values())

diff = pd.DataFrame(columns = ['index', 'diff', 'by_year'])
for i in range(0, len(years)-1):
    year1 = years[i]
    year2 = years[i+1]
    
    df_year1 = df[df['year']==year1]
    code_count1 = df_year1['code'].value_counts().reset_index()
    df_year2 = df[df['year']==year2]
    code_count2 = df_year2['code'].value_counts().reset_index()
    
    merge = pd.merge(code_count1, code_count2, on='index', how='outer')
    merge = merge.fillna(0)
    merge['diff'] = merge['code_y'] - merge['code_x']
    merge['by_year'] = str(year1) + "-" + str(year2)
    
    diff = pd.concat([diff, merge[['index', 'diff', 'by_year']]])

In [None]:
diff.sort_values(by='diff', ascending=False).head(10) 

In [None]:
diff.sort_values(by='diff', ascending=True).head(10)

### 전체 네트워크 분석

In [None]:
def network_generation(df):
    U_nodes = list(df['code'].drop_duplicates())
    V_nodes = list(df['patent'].drop_duplicates())
    edgelist = list(zip(df['code'], df['patent']))

    B_df = nx.Graph()
    B_df.add_nodes_from(U_nodes, bipartite=0)
    B_df.add_nodes_from(V_nodes, bipartite=1)

    B_df.add_edges_from(edgelist)
    G = bipartite.collaboration_weighted_projected_graph(B_df, U_nodes)

    return G

In [None]:
G = network_generation(df)

In [None]:
degree = nx.degree_centrality(G)
betweenness = nx.betweenness_centrality(G)
closeness = nx.closeness_centrality(G)
eigenvector = nx.eigenvector_centrality(G)

nx.set_node_attributes(G, degree, 'degree')
nx.set_node_attributes(G, betweenness, 'betweenness')
nx.set_node_attributes(G, closeness, 'closeness')
nx.set_node_attributes(G, eigenvector, 'eigenvector')

In [None]:
df_g = pd.DataFrame.from_dict(dict(G.nodes(data=True)), orient='index')
df_g['dbratio'] = df_g['betweenness'] / df_g['degree']

In [None]:
# degree 대비 betweenness가 높은 코드 순서로 출력
df_g.sort_values(by='dbratio', ascending=False).head(10)

### 링크별 전년대비 변화량

In [None]:
years = list(df['year'].drop_duplicates().sort_values())

diff = pd.DataFrame(columns = ['source', 'target', 'diff', 'by_year'])
for i in range(0, len(years)-1):
    year1 = years[i]
    year2 = years[i+1]
    
    df_year1 = df[df['year']==year1]    
    df_year2 = df[df['year']==year2]

    G1 = network_generation(df_year1)
    G2 = network_generation(df_year2)
    
    df_g1 = nx.to_pandas_edgelist(G1)
    df_g2 = nx.to_pandas_edgelist(G2)
    
    df_merge = pd.merge(df_g1, df_g2, on=['source', 'target'], how='outer')
    df_merge = df_merge.fillna(0)
    df_merge['diff'] = df_merge['weight_y'] - df_merge['weight_x']
    df_merge['by_year'] = str(year1) + "-" + str(year2)
    
    diff = pd.concat([diff, df_merge[['source', 'target', 'diff', 'by_year']]])

In [None]:
diff.sort_values(by='diff', ascending=False).head(20)