In [4]:
import pandas as pd
import numpy as np
from IPython.display import display, HTML
import os
import plotly.express as px
import plotly
import plotly.graph_objects as go
import plotly as py
import plotly.offline as offline
offline.init_notebook_mode(connected=True)
from plotly.subplots import make_subplots

In [5]:
all_cases = pd.read_csv('/Users/starice/OwnFiles/cityu/RA/case_study/data/total_extracted_result/all_cases.csv', encoding="utf-8")
all_cases['case_id'].drop_duplicates()
all_cases = all_cases[all_cases['defendant'] != all_cases['lawyer']]

<h2>Read and merge all case reason csv files</h2>

In [15]:
files = os.listdir("/Users/starice/OwnFiles/cityu/RA/case_study/case_study_result/case_reasons")
df = pd.DataFrame()
for file in files:
    file_path = "/Users/starice/OwnFiles/cityu/RA/case_study/case_study_result/case_reasons/" + file
    tdf = pd.read_csv(file_path)
    df = df.append(tdf, ignore_index=True)

df = df.drop('Unnamed: 0', axis=1)

In [59]:
reasonDf = all_cases.merge(df, on="case_id", how='inner')
reasonDf = reasonDf.drop('Unnamed: 0', axis=1)
reasonDf = reasonDf[['case_id', 'judgement_date', 'is_success', 'procedure', 
                    'legalfee', 'objectmoney', 'penalty', 'province', 'city', 
                    'district', 'year', 'month', 'day', 'lawyer', 'case_reason', 
                    'is_import']].drop_duplicates().reset_index().drop('index', axis=1)

reasonDf['case_reason'] = reasonDf['case_reason'].apply(lambda r: eval(r))
reasonDf = reasonDf.explode('case_reason').reset_index(drop=True)

reasonDict = {
    "reason1": "标签、配料表、外包装违规", 
    "reason2": "假冒产品", 
    "reason3": "保质期、生产日期", 
    "reason4": "原材料、添加剂", 
    "reason5": "商标", 
    "reason6": "出入境检验、检疫证明、来源证明", 
    "reason7": "生产许可证、生产标准、证明文件", 
    "reason8": "餐饮食品安全", 
    "reason9": "是否为真实消费者", 
    "reason10": "食品无国家标准或未通过安全性评估", 
    "reason11": "产品质检不合格、质量有问题"
}

reasonDf['case_reason_txt'] = reasonDf['case_reason'].map(reasonDict)
reasonDf.head(1)

Unnamed: 0,case_id,judgement_date,is_success,procedure,legalfee,objectmoney,penalty,province,city,district,year,month,day,lawyer,case_reason,is_import,case_reason_txt
0,57ab9058c2265c28a560195d,2014-01-13,True,一审,244.0,374.48,3744.8,浙江省,杭州市,西湖区,2014,1,13,,reason7,False,生产许可证、生产标准、证明文件


In [97]:
a = list(reasonDf[reasonDf['case_reason_txt']=="假冒产品"]['case_id'].drop_duplicates())
with open("假冒产品_cid.txt", "w") as f:
    for i in a:
        f.write(i)
        f.write("\n")

<h2>Compute reason distribution for cases</h2>

In [75]:
reasonDist_cc = reasonDf.groupby('case_reason_txt')['case_id'].nunique().reset_index()
reasonDist_cc.rename(columns={"case_id": "case_count"}, inplace=True)

reasonDist_sc = reasonDf[reasonDf['is_success']=='TRUE'].groupby('case_reason_txt')['case_id'].nunique().reset_index()
reasonDist_sc.rename(columns={"case_id": "success_count"}, inplace=True)
reasonDist = reasonDist_cc.merge(reasonDist_sc, how="left").fillna(0)

In [93]:
reasonDist

Unnamed: 0,case_reason_txt,case_count,success_count
0,产品质检不合格、质量有问题,4385,2605.0
1,保质期、生产日期,13980,12756.0
2,假冒产品,122,72.0
3,出入境检验、检疫证明、来源证明,1704,1244.0
4,原材料、添加剂,3913,3117.0
5,商标,94,56.0
6,是否为真实消费者,6,0.0
7,标签、配料表、外包装违规,21026,14934.0
8,生产许可证、生产标准、证明文件,1248,935.0
9,食品无国家标准或未通过安全性评估,31,29.0


In [83]:
reasonDist_year = reasonDf.groupby(['year', 'case_reason_txt'])['case_id'].nunique().reset_index()
reasonDist_year.rename(columns={"case_id": "case_count"}, inplace=True)

In [84]:
reasonDist_year.head(1)

Unnamed: 0,year,case_reason_txt,case_count
0,2014,产品质检不合格、质量有问题,138


<h2>Display</h2>

<h3>Case count distribution</h3>

In [80]:
fig = px.bar(
    reasonDist,
    x = 'case_reason_txt', 
    y = ['case_count', 'success_count'], 
    barmode="group"
)
fig.show()

<h3>Reason count distribution with year</h3>

In [86]:
fig = px.line(
    reasonDist_year, 
    x = 'year', 
    y = 'case_count', 
    color = "case_reason_txt"
    
)
fig.show()
# 案件原因处理时要去掉公益诉讼类案件

<h3>Distribution of legalfee, objectmoney, and penalty with different reasons</h3>

In [89]:
fig_legalfee = px.box(
    reasonDf[['case_reason_txt', 'legalfee']].drop_duplicates(), 
    x = 'case_reason_txt', 
    y = 'legalfee'
)
fig_legalfee.update_yaxes(type="log")
fig_legalfee.show()

In [90]:
fig_objm = px.box(
    reasonDf[['case_reason_txt', 'objectmoney']].drop_duplicates(), 
    x = 'case_reason_txt', 
    y = 'objectmoney'
)
fig_objm.update_yaxes(type="log")
fig_objm.show()

In [92]:
fig_penalty = px.box(
    reasonDf[reasonDf['is_success']=="TRUE"][['case_reason_txt', 'penalty']].drop_duplicates(), 
    x = 'case_reason_txt', 
    y = 'penalty'
)
fig_penalty.update_yaxes(type="log")
fig_penalty.show()