# Get all nouns

In [31]:
pip install pymorphy2

In [32]:
import csv
import re

import pandas as pd
import pymorphy2
morph = pymorphy2.MorphAnalyzer()

## 1. Make a dataframe out of [Agul_dict.csv](https://github.com/nstsi/agul/blob/master/get_noun_forms/Agul_dict.csv) file
And right away add a column for POS tags 

In [33]:
voc_cols = ['rus', 'agul']
df_voc = pd.read_csv('Agul_dict.csv', names=voc_cols)
df_voc['pos'] = ''

In [35]:
print(df_voc.head(5))

        rus                                               agul pos
0      бязь                                       агъ (ди, ар)    
1     бедро                                        агъ (у, ар)    
2   бродить  адаркас (адаркай, адаркуна, адарк, адаркуб, ад...    
3  больница                                           азархана    
4    балкон                                     айван (ди, ар)    


## 2. Get lists with nouns

In [38]:
good_nouns = []  # nouns for which all forms are given
bad_nouns = []  # nouns for which only the Sg Nom form is given

for index, row in df_voc.iterrows():
    p = morph.parse(row['rus'])[0]
    pos_ = p.tag.POS
    if pos_ == 'NOUN':  # since we need only nouns
        
        # first change capital "i" to Palochka ("ӏ"):
        row_agul = re.sub('[I1]', 'ӏ', row['agul'])
        
        if '(' in row['agul']:
            # for "good" nouns this cell looks like this: агъ (ди, ар)
            good_nouns.append([row['rus'], row_agul])
        else:
            # for "bad" nouns  this cell looks like this: азархана, — no parentheses
            bad_nouns.append([row['rus'], row['agul']])
    
    else:
        pass

## 3. Write nouns into csv files [good_nouns.csv](https://github.com/nstsi/agul/blob/master/get_noun_forms/results/good_nouns.csv) and [bad_nouns.csv](https://github.com/nstsi/agul/blob/master/get_noun_forms/results/bad_nouns.csv)

In [37]:
def list_to_csv(list_: list, name_: str):
    df = pd.DataFrame(list_)
    df.columns = ['rus', 'agul']
    df = df[['agul', 'rus']]
    csv_name = name_ + '.csv'
    df.to_csv(csv_name, index=False)
    print(df.head(5))

In [None]:
list_to_csv(good_nouns, 'good_nouns')

                   agul     rus
0          агъ (ди, ар)    бязь
1           агъ (у, ар)   бедро
2        айван (ди, ар)  балкон
3  Аллагь (Аллагьди, -)     Бог
4       алухӏуб (а, ар)   брань


In [None]:
list_to_csv(bad_nouns, 'bad_nouns')

        agul       rus
0   азархана  больница
1  аттрухъуб      бред
2   ахттилат    беседа
3    багувел  близость
4       бала      беда
