In [4]:
import team1_module as t1 #モジュールのimport
import pandas as pd

#team1_moduleの改訂版
def read_recipe(n): 
    columns = ['recipeID','UserID','recipe_name','details','servings',
              'advice','upbringing','date']
    loc = 'data/recipe/recipe' + str(n) + '.csv'
    df = pd.read_csv(loc,names=columns,index_col=None)
    return df


def read_report(n):
    columns = ['recipeID','UserID','message','reply','date']
    loc = 'data/report/report' + str(n) + '.csv'
    df = pd.read_csv(loc,names=columns)
    return df


def read_step(n):
    columns = ['recipeID','step_number','note']
    loc = 'data/step/step' + str(n) + '.csv'
    df = pd.read_csv(loc,names=columns)
    return df


def read_ingredient(n):
    columns = ['recipeID','ingredient','size']
    loc = 'data/ingredient/ingredient' + str(n) + '.csv'
    df = pd.read_csv(loc,names=columns)
    return df


def z2h(df):
    hankaku = ["0","1","2","3","4","5","6","7","8","9","/"]
    zenkaku = ["０","１","２","３","４","５","６","７","８","９","／"] 
    columns = df.columns.values
    for i,j in zip(hankaku,zenkaku):
        for k in columns:
            df[k] = df[k].str.replace(j,i)
    return df


def size2g(df): #小、大さじのところだけ変更する
    df['size'] = df['size'].replace('大さじ(\d+).*',r'\1*15',regex=True)
    df['size'] = df['size'].replace('小さじ(\d+).*',r'\1*5',regex=True)
    df['size'] = df['size'].replace('小さじ(\d/\d).*',r'5*\1',regex=True)
    df['size'] = df['size'].replace('(\d+)ｇ',r'\1',regex=True)
    df['size'] = df['size'].replace('(\d+)g',r'\1',regex=True)
    df['size'] = df['size'].replace('少々','0.5')
    df['size'] = df['size'].replace('(\d+)グラム',r'\1',regex=True)
    df['size'] = df['size'].replace('^0(\d+)',r'\1',regex=True)
    df["size"] = df["size"].replace('大(\d+).*',r'\1*15',regex=True)
    df["size"] = df["size"].replace('大匙(\d+).*',r'\1*15',regex=True)
    df['size'] = df['size'].replace('小(\d+).*',r'\1*5',regex=True)
    df['size'] = df['size'].replace('小匙(\d+).*',r'\1*5',regex=True)
                                    
    return df
    

def g2float(df):
    x = df[df['size'].str.match(r'^\d+\.*\**\d*$')].copy()
    x['size'] = x['size'].apply(eval)
    return x


def clean_ingredient(df):
    df = df.fillna('適量')
    df = rmkigou(df)
    df = z2h(df) #半角に直す
    df = size2g(df) #g表現に直す
    df = g2float(df) #数字計算

    return df

def report_popularity(df):
    df = df["recipeID"].value_counts()
    df = df.reset_index()
    df = df.rename(columns={"index":"recipeID","recipeID":"count"})
    return df


def rmkigou(df):
    kigoulist = ["●","☆","◎","※","▲","◯","★"]
    for kigou in kigoulist:
        df["ingredient"] = df["ingredient"].str.replace(kigou,"")
    return df


def sum_seasoning(df):
    df["ingredient"] = df["ingredient"].replace("グラニュー糖","砂糖")
    df["ingredient"] = df["ingredient"].replace("無塩バター","バター")
    df["ingredient"] = df["ingredient"].replace("マーガリン","バター")
    df["ingredient"] = df["ingredient"].replace("しょうゆ","醤油")
    peppers = ["こしょう","胡椒","塩・胡椒","塩・コショウ","塩、こしょう","ブラックペッパー","塩コショウ","塩・塩こしょう","塩,塩こしょう","塩・こしょう","塩胡椒","コショウ"]
    for pepper in peppers:
        df["ingredient"] = df["ingredient"].replace(pepper,"塩こしょう")  
    return df

def mk_vector(df):
    vec = pd.DataFrame(columns = ["recipeID","sugar","salt","butter","pepper","soysource"],index=[])
    vec_dic = {"砂糖":"sugar","塩":"salt","バター":"butter","塩こしょう":"pepper","醤油":"soysource"}

    vec["recipeID"] = df["recipeID"]

    for row in df.itertuples():
        vec.loc[vec["recipeID"]==row.recipeID,vec_dic[row.ingredient]] = row.size
        
    vec = vec.fillna(0)
    return vec

def sum_seasoning_otsumami(df):
    df["ingredient"] = df["ingredient"].replace("料理酒","酒")
    df["ingredient"] = df["ingredient"].replace("お酒","酒")
    df["ingredient"] = df["ingredient"].replace("しょうゆ","醤油")
    df["ingredient"] = df["ingredient"].replace("マヨ","マヨネーズ")
    return df

def mk_vector_otsumami(df):
    vec = pd.DataFrame(columns = ["recipeID","salt","soysource","mayo","sesamioil","sake"],index=[])
    vec_dic = {"塩":"salt","醤油":"soysource","マヨネーズ":"mayo","ごま油":"sesamioil","酒":"sake"}

    vec["recipeID"] = df["recipeID"]

    for row in df.itertuples():
        vec.loc[vec["recipeID"]==row.recipeID,vec_dic[row.ingredient]] = row.size
        
    vec = vec.fillna(0)
    return vec

def extract_something(df,sth):
    x=df[df["recipe_name"].str.contains(sth,na=False)]
    y=df[df["details"].str.contains(sth,na=False)]
    z=x.append(y)
    a=set(z["recipeID"])
    return df[df["recipeID"].isin(a)]
    
    
#データ設定
dflist_ingredient = []
dflist_recipe = []

years = [1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014]

for year in years:
    df = read_ingredient(year)
    dflist_ingredient.append(df)
    df = read_recipe(year)
    dflist_recipe.append(df)
    
    
    
df_ingredient = pd.concat(dflist_ingredient)
df_recipe = pd.concat(dflist_recipe)

otsumami = extract_something(df_recipe,"おつまみ") #おつまみを含むレシピ名を抽出
otsumami_ingredient = df_ingredient[df_ingredient["recipeID"].isin(otsumami["recipeID"])] #材料か
otsumami_ingredient_c = clean_ingredient(otsumami_ingredient)


vec_list = ["塩","醤油","マヨネーズ","ごま油","酒"] #今回参考にする調味料
df_ingredient_c_sum = sum_seasoning_otsumami(otsumami_ingredient_c) #調味料をまとめる
df_ingredient_c_sum_cut = df_ingredient_c_sum[df_ingredient_c_sum["ingredient"].isin(vec_list)]

vec = mk_vector_otsumami(df_ingredient_c_sum_cut)



#酒と醤油の比率とって最頻値
sake = vec[vec["sake"]>0.1 ]
sake = sake[sake["sake"]<200]
sakesoy = sake[sake["soysource"]>0.1]
sakesoy = sakesoy[sakesoy["soysource"]<200]

sakesoy_wariai = {}
for row in sakesoy.itertuples():
    p = str(row.sake/row.soysource)
    if p not in sakesoy_wariai:
        sakesoy_wariai[p] = 1
    else:
        sakesoy_wariai[p] += 1
        
sakesoy_wariai_sorted = sorted(sakesoy_wariai.items(), key=lambda x:x[1],reverse = True)



KeyboardInterrupt: 

In [73]:
df_ingredient_c_sum_cut

Unnamed: 0,recipeID,ingredient,size
64,0a2026b5dfe06d65b796d2cc0952eeb806617679,ごま油,0.5
65,0a2026b5dfe06d65b796d2cc0952eeb806617679,醤油,0.5
395,256a6b526477dbf3bfc395c1c2dab39b52d06f41,塩,0.5
398,256a6b526477dbf3bfc395c1c2dab39b52d06f41,マヨネーズ,15.0
475,3089a9eb2c23220a134743a2446158395f83c6d8,酒,5.0
...,...,...,...
1545325,ffc2e3e9d29f88ec17e9372265c38c5e7e528a83,酒,30.0
1545341,ffc36360ba72d03137fbd75041b57152083cd7f0,醤油,10.0
1545344,ffc36360ba72d03137fbd75041b57152083cd7f0,ごま油,10.0
1545548,ffcc2a26ae2ae496d0ce7c3cba79bebc815759a8,ごま油,15.0


In [54]:
vec_list_e=["salt","soysource","mayo","sesamioil","sake"]


#調味料の比をとって大きい順に
vec3_rate ={}
hozon=[]
for a in vec_list_e:
    for b in vec_list_e:
        if a==b:
            continue
        if (a,b) in hozon:
            continue
        else:
            hozon.append((b,a))
            
        
        vec1 = vec[vec[a]>0.1]
        vec1 = vec1[vec1[a]<200]
        vec2 = vec1[vec1[b]>0.1]
        vec2 = vec2[vec2[b]<200]
        
        ans = vec2[a]/vec2[b]
        ans.value_counts()
        
        vec3_rate [a+"_"+b] =ans.value_counts()

In [55]:
sum_num={}
hozon=[]
for a in vec_list_e:
    for b in vec_list_e:
        if a==b:
            continue
        if (a,b) in hozon:
            continue
        else:
            hozon.append((b,a))
            
        sum_num[a+"_"+b]=vec3_rate[a+"_"+b].values.sum()
        
sort_t=sorted(sum_num.items(),key=lambda x:x[1],reverse=True)

In [56]:
sort_t=sorted(sum_num.items(),key=lambda x:x[1],reverse=True)

In [57]:
sort_t

[('soysource_sake', 5980),
 ('soysource_sesamioil', 4054),
 ('salt_soysource', 2290),
 ('sesamioil_sake', 2166),
 ('salt_sesamioil', 1986),
 ('salt_sake', 1638),
 ('soysource_mayo', 1385),
 ('salt_mayo', 805),
 ('mayo_sake', 535),
 ('mayo_sesamioil', 320)]

In [67]:
vec3_rate["salt_soysource"]

0.033333     446
0.100000     405
1.000000     398
0.333333     319
0.016667     184
0.166667     129
0.050000      99
0.500000      70
10.000000     43
0.011111      39
0.008333      32
0.111111      30
0.666667      20
2.000000      17
0.083333      14
1.500000       9
0.200000       6
0.066667       5
0.006667       3
0.400000       3
0.095238       3
0.300000       2
0.003030       2
0.088889       2
6.000000       2
0.250000       2
30.000000      2
0.025000       2
0.214286       2
dtype: int64

In [87]:
vec3_rate["salt_soysource"].index[0]

0.03333333333333333

In [59]:
vec

Unnamed: 0,recipeID,salt,soysource,mayo,sesamioil,sake
64,0a2026b5dfe06d65b796d2cc0952eeb806617679,0.0,0.5,0.0,0.5,0.0
65,0a2026b5dfe06d65b796d2cc0952eeb806617679,0.0,0.5,0.0,0.5,0.0
395,256a6b526477dbf3bfc395c1c2dab39b52d06f41,0.5,0.0,15.0,0.0,0.0
398,256a6b526477dbf3bfc395c1c2dab39b52d06f41,0.5,0.0,15.0,0.0,0.0
475,3089a9eb2c23220a134743a2446158395f83c6d8,0.0,5.0,30.0,0.0,5.0
...,...,...,...,...,...,...
1545325,ffc2e3e9d29f88ec17e9372265c38c5e7e528a83,0.0,30.0,0.0,0.0,30.0
1545341,ffc36360ba72d03137fbd75041b57152083cd7f0,0.0,10.0,0.0,10.0,0.0
1545344,ffc36360ba72d03137fbd75041b57152083cd7f0,0.0,10.0,0.0,10.0,0.0
1545548,ffcc2a26ae2ae496d0ce7c3cba79bebc815759a8,0.0,0.0,0.0,15.0,0.0


In [68]:
import math

In [70]:
salt_soy_rate1=vec[abs(30*vec["salt"]-vec["soysource"])<0.1]
salt_soy_rate1=salt_soy_rate1[salt_soy_rate1["salt"]>0.1]

In [71]:
salt_soy_rate1

Unnamed: 0,recipeID,salt,soysource,mayo,sesamioil,sake
33332,fb2c8e83cc55a9f9f8d6c341a8fa2755d158e5d1,0.5,15.0,0.0,0.0,0.0
33334,fb2c8e83cc55a9f9f8d6c341a8fa2755d158e5d1,0.5,15.0,0.0,0.0,0.0
55589,bad4e109ae7b503ebecb4085d555aad0197b355e,0.5,15.0,0.0,0.0,0.0
55592,bad4e109ae7b503ebecb4085d555aad0197b355e,0.5,15.0,0.0,0.0,0.0
59513,c956b66d6247c0797b5b06fd35530d432f29b6b5,0.5,15.0,0.0,0.0,0.0
...,...,...,...,...,...,...
1336862,dd03bde488309bd3e5467da36f232d119d1be8c5,0.5,15.0,15.0,0.0,0.0
1336863,dd03bde488309bd3e5467da36f232d119d1be8c5,0.5,15.0,15.0,0.0,0.0
1336864,dd03bde488309bd3e5467da36f232d119d1be8c5,0.5,15.0,15.0,0.0,0.0
1484901,f5cb61186a9e0ddb68988223199742e654529299,0.5,15.0,0.0,0.0,0.0


In [72]:
vec

Unnamed: 0,recipeID,salt,soysource,mayo,sesamioil,sake
64,0a2026b5dfe06d65b796d2cc0952eeb806617679,0.0,0.5,0.0,0.5,0.0
65,0a2026b5dfe06d65b796d2cc0952eeb806617679,0.0,0.5,0.0,0.5,0.0
395,256a6b526477dbf3bfc395c1c2dab39b52d06f41,0.5,0.0,15.0,0.0,0.0
398,256a6b526477dbf3bfc395c1c2dab39b52d06f41,0.5,0.0,15.0,0.0,0.0
475,3089a9eb2c23220a134743a2446158395f83c6d8,0.0,5.0,30.0,0.0,5.0
...,...,...,...,...,...,...
1545325,ffc2e3e9d29f88ec17e9372265c38c5e7e528a83,0.0,30.0,0.0,0.0,30.0
1545341,ffc36360ba72d03137fbd75041b57152083cd7f0,0.0,10.0,0.0,10.0,0.0
1545344,ffc36360ba72d03137fbd75041b57152083cd7f0,0.0,10.0,0.0,10.0,0.0
1545548,ffcc2a26ae2ae496d0ce7c3cba79bebc815759a8,0.0,0.0,0.0,15.0,0.0


In [77]:
otsumami_salt_soy=otsumami_ingredient[otsumami_ingredient["recipeID"].isin(salt_soy_rate1["recipeID"])]

In [90]:
otsumami_salt_soy["ingredient"].value_counts()

塩           149
醤油           78
ごま油          37
酒            35
しょうゆ         33
           ... 
☆生姜           1
●コショウ         1
玉ねぎ(薄切り)      1
干ししいたけ        1
茹で卵           1
Name: ingredient, Length: 612, dtype: int64

In [92]:
result_dict={}
hozon=[]
for a in vec_list_e:
    for b in vec_list_e:
        if a==b:
            continue
        if (a,b) in hozon:
            continue
        else:
            hozon.append((b,a))
        
        first_rate=vec3_rate[a+"_"+b].index[0]
        x=vec[abs(vec[a]-first_rate*vec[b])<0.01]
        x=x[x[a]>0.1]
                
        y=otsumami_ingredient[otsumami_ingredient["recipeID"].isin(x["recipeID"])]
        
        result_dict[a+"_"+b]=y["ingredient"].value_counts()

In [95]:
result_dict["soysource_mayo"][:20]

マヨネーズ      111
醤油          78
しょうゆ        34
アボカド        25
☆マヨネーズ      24
●マヨネーズ      24
★マヨネーズ      21
●醤油         19
★醤油         17
塩           16
☆醤油         14
塩コショウ       13
とろけるチーズ     12
サラダ油        12
ちくわ         11
ごま油         11
酒            9
片栗粉          9
ツナ缶          8
大葉           8
Name: ingredient, dtype: int64

In [111]:
result_dict["soysource_sake"][:40]

酒        678
醤油       518
砂糖       275
みりん      273
ごま油      268
片栗粉      230
しょうゆ     227
☆酒       164
★酒       149
サラダ油     130
水        123
塩        121
★醤油      120
☆醤油      116
●酒       107
☆砂糖      101
●醤油       95
☆みりん      86
★みりん      80
にんにく      77
☆しょうゆ     77
★砂糖       71
しょうが      66
●砂糖       63
●みりん      63
揚げ油       59
小麦粉       58
油         57
こんにゃく     57
塩コショウ     55
卵         53
お酒        52
生姜        50
大葉        49
玉ねぎ       48
★しょうゆ     46
大根        45
バター       43
長ネギ       43
人参        42
Name: ingredient, dtype: int64

In [100]:
result_dict["salt_soysource"][:20]

ごま油     286
酒       166
醤油       82
砂糖       50
塩        47
☆酒       43
片栗粉      38
みりん      37
★酒       35
しょうゆ     34
●酒       28
にんにく     25
ピーマン     22
★醤油      22
☆砂糖      21
☆醤油      19
★砂糖      19
長ネギ      18
水        18
●醤油      17
Name: ingredient, dtype: int64

In [101]:
result_dict["salt_mayo"][:20]


塩         81
マヨネーズ     68
卵         12
サラダ油       9
☆マヨネーズ     9
★マヨネーズ     9
酒          7
★塩         7
片栗粉        7
きゅうり       7
●マヨネーズ     7
☆塩         7
酢          6
揚げ油        6
醤油         6
◎マヨネーズ     6
こしょう       6
餃子の皮       5
じゃがいも      5
ツナ缶        5
Name: ingredient, dtype: int64

In [102]:
result_dict["salt_sesamioil"][:20]


ごま油     139
塩       136
きゅうり     32
醤油       26
☆塩       19
☆ごま油     18
★塩       17
もやし      16
★ごま油     16
酒        16
砂糖       15
キャベツ     15
片栗粉      13
しょうゆ     11
味の素      10
●塩       10
ごま       10
●ごま油      9
★酒        9
すりごま      8
Name: ingredient, dtype: int64

In [103]:
result_dict["salt_sake"][:20]


塩        143
酒        100
ごま油       32
醤油        32
片栗粉       29
サラダ油      22
砂糖        22
★酒        19
こしょう      17
しょうゆ      16
☆酒        15
にんにく      14
★塩        14
マヨネーズ     14
小麦粉       13
みりん       12
お酒        12
コショウ      11
●酒        11
玉ねぎ       11
Name: ingredient, dtype: int64

In [104]:
result_dict["soysource_mayo"][:20]


マヨネーズ      111
醤油          78
しょうゆ        34
アボカド        25
☆マヨネーズ      24
●マヨネーズ      24
★マヨネーズ      21
●醤油         19
★醤油         17
塩           16
☆醤油         14
塩コショウ       13
とろけるチーズ     12
サラダ油        12
ちくわ         11
ごま油         11
酒            9
片栗粉          9
ツナ缶          8
大葉           8
Name: ingredient, dtype: int64

In [105]:
result_dict["soysource_sesamioil"][:20]


ごま油      480
醤油       289
しょうゆ     109
砂糖        96
塩         91
酒         85
☆ごま油      74
きゅうり      70
☆醤油       66
みりん       57
★醤油       54
★ごま油      51
もやし       46
片栗粉       44
にんにく      36
☆しょうゆ     35
☆砂糖       35
●醤油       35
酢         33
長ネギ       33
Name: ingredient, dtype: int64

In [106]:
result_dict["soysource_sake"][:20]


酒       678
醤油      518
砂糖      275
みりん     273
ごま油     268
片栗粉     230
しょうゆ    227
☆酒      164
★酒      149
サラダ油    130
水       123
塩       121
★醤油     120
☆醤油     116
●酒      107
☆砂糖     101
●醤油      95
☆みりん     86
★みりん     80
にんにく     77
Name: ingredient, dtype: int64

In [108]:
result_dict["mayo_sesamioil"][:20]


ごま油       22
マヨネーズ     20
☆マヨネーズ     5
☆ごま油       5
キムチ        5
きゅうり       4
ちくわ        4
醤油         4
塩          4
●マヨネーズ     3
★ごま油       3
★マヨネーズ     3
じゃがいも      3
白菜         2
塩・こしょう     2
しょう油       2
にんにく       2
こしょう       2
★醤油        2
キャベツ       2
Name: ingredient, dtype: int64

In [109]:
result_dict["mayo_sake"][:20]


酒           33
マヨネーズ       32
★マヨネーズ      11
醤油          10
☆マヨネーズ       8
☆酒           7
砂糖           7
ごま油          7
片栗粉          6
大葉           6
水            5
塩            5
●酒           4
調味料          4
サラダ油         4
薄力粉          4
塩コショウ        4
ササミ          3
鶏ささみ         3
オイスターソース     3
Name: ingredient, dtype: int64

In [110]:
result_dict["sesamioil_sake"][:20]

ごま油     286
酒       166
醤油       82
砂糖       50
塩        47
☆酒       43
片栗粉      38
みりん      37
★酒       35
しょうゆ     34
●酒       28
にんにく     25
ピーマン     22
★醤油      22
☆砂糖      21
☆醤油      19
★砂糖      19
長ネギ      18
水        18
●醤油      17
Name: ingredient, dtype: int64