# Create noun forms

In [None]:
import re
import pandas as pd

## Manually correct file [good_nouns.csv](https://github.com/nstsi/agul/blob/master/get_noun_forms/results/good_nouns.csv)
1. Remove all the words which aren't actually nouns
2. Leave only nouns, for which the suffixes are given, and move those, for which the full forms are given to [form_nouns.csv](https://github.com/nstsi/agul/blob/master/get_noun_forms/results/form_nouns.csv)
3. Leave only one-word nouns 
4. Save file as [suffix_nouns.csv](https://github.com/nstsi/agul/blob/master/get_noun_forms/results/suffix_nouns.csv)

## 1. Make dataframes out of csv files

In [None]:
df_suffix_nouns = pd.read_csv('suffix_nouns.csv')

In [None]:
print(df_suffix_nouns.head(3))

              agul     rus
0  ӏаб (а/ала, ар)  люлька
1   ӏатӏуб (а, ар)  пробор
2     ӏачӏ (а, ар)    щель


In [None]:
df_form_nouns = pd.read_csv('form_nouns.csv')

In [None]:
print(df_form_nouns.head(3))

                   agul
0   ӏашв (ӏашу, ӏашвар)
1        ӏекв (ӏеку, -)
2  Аллагь (Аллагьди, -)


## 2. Create lists for every form

In [None]:
SgNoms = []
SgErgs = []
PlNoms = []

## 3. Make forms *Sg Erg* and *Pl Nom* by adding respective suffixes. Add forms to the respective lists.

In [None]:
for index, row in df_suffix_nouns.iterrows():
    
    # row that looks like 'ӏатӏуб (а, ар)' is split in two:
    row_agul = row['agul'].split(' (')
    # add the Sg Nom form to the respective list right away:
    SgNoms.append(row_agul[0])

    suffixes_ = re.sub("\)", "", row_agul[1])
    suffixes = suffixes_.split(', ')

    if suffixes[0] != '-':
        if '/' in suffixes[0]:
            # in case we have words for which there are 2 suffix options:
            suf1, suf2 = suffixes[0].split('/')
            # make forms:
            SgErg_1 = row_agul[0] + ':' + row_agul[0] + suf1
            SgErgs.append(SgErg_1)
            SgErg_2 = row_agul[0] + ':' + row_agul[0] + suf2
            SgErgs.append(SgErg_2)
        else:
            SgErg_ = row_agul[0] + ':' + row_agul[0] + suffixes[0]
            SgErgs.append(SgErg_)
    # if the suffix isn't given, we won't write anything
    else:
        pass

    if suffixes[1] != '-':
        # make forms:
        PlNom_ = row_agul[0] + ':' + row_agul[0] + suffixes[1]
        PlNoms.append(PlNom_)
    # if the suffix isn't given, we won't write anything
    else:
        pass

## 4. Get forms *Sg Erg* and *Pl Nom* and add them to the respective lists.

In [None]:
for index, row in df_form_nouns.iterrows():
    
    # row that looks like "ӏашв (ӏашу, ӏашвар)" is split in two:
    row_agul = row['agul'].split(' (')
    # add the Sg Nom form to the respective list right away:
    SgNoms.append(row_agul[0])

    forms_ = re.sub("\)", "", row_agul[1])
    forms = forms_.split(', ')
    
    if forms[0] != '-':
        if '/' in forms[0]:
            # in case we have words for which there are 2 form options:
            form3, form4 = forms[0].split('/')
            SgErg1 = row_agul[0] + ':' + form3
            SgErgs.append(SgErg_1)
            SgErg_2 = row_agul[0] + ':' + form4
            SgErgs.append(SgErg_2)
        else:
            SgErg_ = row_agul[0] + ':' + forms[0]
            SgErgs.append(SgErg_)
    # if the form isn't given, we won't write anything
    else:
        pass

    if forms[1] != '-':
        if '/' in forms[1]:
            # in case we have words for which there are 2 form options:
            form1, form2 = forms[1].split('/')
            PlNom_1 = row_agul[0] + ':' + form1
            PlNoms.append(PlNom_1)
            PlNom_2 = row_agul[0] + ':' + form2
            PlNoms.append(PlNom_2)
        else:
            PlNom_ = row_agul[0] + ':' + forms[1]
            PlNoms.append(PlNom_)
    # if the form isn't given, we won't write anything
    else:
        pass

## 5. Write forms into respective csv files 

In [None]:
def list_to_csv(list_: list, name_: str):
    df = pd.DataFrame(list_)
    csv_name = name_ + '.csv'
    df.to_csv(csv_name, index=False)
    print(df.head(3))

In [None]:
list_to_csv(SgNoms, 'SgNoms')

        0
0     ӏаб
1  ӏатӏуб
2    ӏачӏ


In [None]:
list_to_csv(SgErgs, 'SgErgs')

                0
0        ӏаб:ӏаба
1      ӏаб:ӏабала
2  ӏатӏуб:ӏатӏуба


In [None]:
list_to_csv(PlNoms, 'PlNoms')

                 0
0        ӏаб:ӏабар
1  ӏатӏуб:ӏатӏубар
2      ӏачӏ:ӏачӏар
