In [1]:
import json
import pandas

In [2]:
def cross_join(left, right):
    return left.assign(key=1).merge(right.assign(key=1), on='key', how='outer').drop('key', 1)

def json_to_dataframe(data_in):
    def to_frame(data, prev_key=None):
        if isinstance(data, dict):
            df = pandas.DataFrame()
            for key in data:
                df = cross_join(df, to_frame(data[key], prev_key + '.' + key))
        elif isinstance(data, list):
            df = pandas.DataFrame()
            for i in range(len(data)):
                df = pandas.concat([df, to_frame(data[i], prev_key)])
        else:
            df = pandas.DataFrame({prev_key[1:]: [data]})
        return df
    return to_frame(data_in)



In [3]:
def normalize_json(data: dict):
  
    new_data = dict()
    for key, value in data.items():
        if not isinstance(value, dict):
            new_data[key] = value
        else:
            for k, v in value.items():
                new_data[key + "_" + k] = v
  
    return new_data
  
  
def generate_csv_data(data: dict):
  
    # Defining CSV columns in a list to maintain
    # the order
    csv_columns = data.keys()
  
    # Generate the first row of CSV 
    csv_data = ",".join(csv_columns) + "\n"
  
    # Generate the single record present
    new_row = list()
    for col in csv_columns:
        new_row.append(str(data[col]))
  
    # Concatenate the record with the column information 
    # in CSV format
    csv_data += ",".join(new_row) + "\n"
  
    return csv_data

In [4]:

with open('../data/song_dataV6.json', 'r', encoding='utf-8') as json_file:
    json_data = json.load(json_file)


In [5]:
import pandas as pd

In [6]:
df = pd.read_json('../data/song_dataV6.json')

In [7]:
df.head()

Unnamed: 0,id,title,topic,Lyrics in english,Lyrics in sinhala,Singer,Music,Lyrics,Annotation
0,1,Aadaraya sundara varadaki,love life,Aadaraya sundara varadaki\nkisidaa samaavak na...,ආදරය සුන්දර වරදකි\nකිසිදා සමාවක්‌ නම් නැති\nමට...,Karunarathna Diwlgane,Gunadasa Kapuge,Yamuna Malini Perera,"[{'Line': 6, 'Metaphore': 'nuvan daaley', 'Obj..."
1,2,Aadaraye ulpatha wu amma,motherhood,Aadaraye ulpatha wu amma\nMaa obage puthu wu……...,ආදරයේ උල්පත වූ අම්මා\nමා ඔබගේ පුතු වූ..\nඔබ මත...,Victor Rathnayake,Victor Rathnayake,Premakeerthi De Alwis,"[{'Line': 2, 'Metaphore': 'Aadaraye ulpatha wu..."
2,3,Aalen wela ganna kolam kala,staring or blooming love,Aalen wela ganna kolam kala\nHitha aalen muda ...,ආලෙන් වෙලා ගන්න කෝලම් කලා\nහිත ආලෙන් මුදා ගන්න...,Athula Adikari With Samitha Mudunkotuwa,Madawa Hewawasam,Madawa Hewawasam,"[{'Line': 2, 'Metaphore': 'aadare bhawanawak w..."
3,4,Ada dee geka giya,broken love & sorrow,Ada dee geka giya nuba mata Bulath athak deela...,අද දීගෙක ගිය නුඹ මට බුලත් අතක් දීලා\nයන්න පිටත...,Sunil Edirisinghe,Madawa Hewawasam,Madawa Hewawasam,"[{'Line': 4, 'Metaphore': 'Nethe kandulu sirak..."
4,5,Adara mal wala,staring or blooming love,Adara mal wala… pata kiyannada\nEwage lassana…...,ආදර මල් වල පාට කියන්නද\nමේ වගෙ ලස්සන තව නෑ\nමේ...,Kasun Kalhara,Nawarathna Gamage,Wasantha Kumara Kobawaka,"[{'Line': 2, 'Metaphore': 'mal sitha', 'Object..."


In [8]:
df.to_csv('../data/data.csv', index=False)

In [9]:
df1 = df.copy()

In [10]:
df1.set_index('id', inplace=True)

In [11]:
anns = df1['Annotation']

In [12]:
anns.items()

<zip at 0x2133f414a40>

In [13]:
annotation_data = []
for i, v in anns.items():
    # print(i, v, type(v))
    for a in v:
        a['Song ID'] = i 
        annotation_data.append(a)

In [14]:
df2 = pd.DataFrame().from_records(annotation_data)
df2.head()

Unnamed: 0,Line,Metaphore,Object-sinhala,Object-english,Subject-sinhala,Subject-english,Song ID
0,6,nuvan daaley,nuvan,eye,induwara,sun,1
1,2,adaraya sundara varadaki,adaraya,love,sundara varadaki,mistake,1
2,14,hada svara,hada,heart,svara,notes,1
3,12,veenaa bandin,veena,harps,bandin,hip,1
4,2,Aadaraye ulpatha wu amma,Aadaraye ulpatha wu,source of love,amma,mother,2


In [18]:
df.iloc[:, :-1].to_csv('../data/song_data.csv', index=False, encoding='utf-8')

In [16]:
df2.to_csv('../data/annotation_data.csv', index=False)

In [19]:
ss_df = pd.read_csv('../data/song_data.csv')
ss_df.head()

Unnamed: 0,id,title,topic,Lyrics in english,Lyrics in sinhala,Singer,Music,Lyrics
0,1,Aadaraya sundara varadaki,love life,Aadaraya sundara varadaki\nkisidaa samaavak na...,ආදරය සුන්දර වරදකි\nකිසිදා සමාවක්‌ නම් නැති\nමට...,Karunarathna Diwlgane,Gunadasa Kapuge,Yamuna Malini Perera
1,2,Aadaraye ulpatha wu amma,motherhood,Aadaraye ulpatha wu amma\nMaa obage puthu wu……...,ආදරයේ උල්පත වූ අම්මා\nමා ඔබගේ පුතු වූ..\nඔබ මත...,Victor Rathnayake,Victor Rathnayake,Premakeerthi De Alwis
2,3,Aalen wela ganna kolam kala,staring or blooming love,Aalen wela ganna kolam kala\nHitha aalen muda ...,ආලෙන් වෙලා ගන්න කෝලම් කලා\nහිත ආලෙන් මුදා ගන්න...,Athula Adikari With Samitha Mudunkotuwa,Madawa Hewawasam,Madawa Hewawasam
3,4,Ada dee geka giya,broken love & sorrow,Ada dee geka giya nuba mata Bulath athak deela...,අද දීගෙක ගිය නුඹ මට බුලත් අතක් දීලා\nයන්න පිටත...,Sunil Edirisinghe,Madawa Hewawasam,Madawa Hewawasam
4,5,Adara mal wala,staring or blooming love,Adara mal wala… pata kiyannada\nEwage lassana…...,ආදර මල් වල පාට කියන්නද\nමේ වගෙ ලස්සන තව නෑ\nමේ...,Kasun Kalhara,Nawarathna Gamage,Wasantha Kumara Kobawaka
