In [1]:
import boto3
import pandas as pd
import numpy as np
from datetime import datetime
import os

In [2]:
# Get the service resource.
dynamodb = boto3.resource('dynamodb')

In [3]:
df = pd.read_csv("music.csv")

In [4]:
df.drop("lyrics", axis=1, inplace=True)

In [5]:
df.head()

Unnamed: 0.1,Unnamed: 0,artist_name,track_name,release_date,genre,len,dating,violence,world/life,night/time,...,sadness,feelings,danceability,loudness,acousticness,instrumentalness,valence,energy,topic,age
0,0,mukesh,mohabbat bhi jhoothi,1950,pop,95,0.000598,0.063746,0.000598,0.000598,...,0.380299,0.117175,0.357739,0.454119,0.997992,0.901822,0.339448,0.13711,sadness,1.0
1,4,frankie laine,i believe,1950,pop,51,0.035537,0.096777,0.443435,0.001284,...,0.001284,0.001284,0.331745,0.64754,0.954819,2e-06,0.325021,0.26324,world/life,1.0
2,6,johnnie ray,cry,1950,pop,24,0.00277,0.00277,0.00277,0.00277,...,0.00277,0.225422,0.456298,0.585288,0.840361,0.0,0.351814,0.139112,music,1.0
3,10,pérez prado,patricia,1950,pop,54,0.048249,0.001548,0.001548,0.001548,...,0.225889,0.001548,0.686992,0.744404,0.083935,0.199393,0.77535,0.743736,romantic,1.0
4,12,giorgos papadopoulos,apopse eida oneiro,1950,pop,48,0.00135,0.00135,0.417772,0.00135,...,0.0688,0.00135,0.291671,0.646489,0.975904,0.000246,0.597073,0.394375,romantic,1.0


In [6]:
table = dynamodb.create_table(
    TableName='MusicSong',
    KeySchema=[
        {
            'AttributeName': 'ArtistName',
            'KeyType': 'HASH'
        },
        {
            'AttributeName': 'SongName',
            'KeyType': 'RANGE'
        }
    ],
    AttributeDefinitions=[
        {
            'AttributeName': 'ArtistName',
            'AttributeType': 'S'
        },
        {
            'AttributeName': 'SongName',
            'AttributeType': 'S'
        },
    ],
    ProvisionedThroughput={
        'ReadCapacityUnits': 5,
        'WriteCapacityUnits': 5
    }
)

# Wait until the table exists.
table.wait_until_exists()

In [7]:
table = dynamodb.Table(name='MusicSong')

In [8]:
df = df.sample(1000).reset_index(drop=True)

In [9]:
with table.batch_writer() as batch:
    for i in range(len(df)):
        num_r = np.random.randint(1,6)
        item={
            'ArtistName': df.iloc[i][1].title(),
            'SongName': df.iloc[i][2].title(),
            'RelaseDate': int(df.iloc[i][3]),
            'Genre': df.iloc[i][4].title(),
            'Length': int(df.iloc[i][5]),
            'Rating': int(num_r)
        }
        batch.put_item(Item=item)

In [10]:
import typing
import json
import boto3.dynamodb.conditions as conditions

In [11]:
from decimal import Decimal

class DecimalEncoder(json.JSONEncoder):
  def default(self, obj):
    if isinstance(obj, Decimal):
      return float(obj)
    return json.JSONEncoder.default(self, obj)

In [45]:
def get_all_artist(key, eq) -> typing.List[dict]:
    if not isinstance(eq, list):
        eq = [eq]
    response = []
    for elem in eq:
        r = table.query(
            KeyConditionExpression=conditions.Key(key).eq(elem.title())
        )
        response.append(r["Items"])
    return response

dm = get_all_artist("ArtistName", ["Jimi Hendrix", "Metallica"])

In [46]:
dm = json.dumps(dm, cls=DecimalEncoder, indent=4)
rs = json.loads(dm)

In [47]:
for m in rs:
    for i in m:
        age = datetime.today().year - i["RelaseDate"]
        print(f"{i['ArtistName']} - {i['SongName']}. This song have {age} years.")

Jimi Hendrix - Are You Experienced?. This song have 55.0 years.
Jimi Hendrix - Introduction (Live At Woodstock). This song have 23.0 years.
Metallica - Sabbra Cadabra. This song have 24.0 years.


In [28]:
try:
    table.global_secondary_indexes[0]["IndexName"]
except:
    print("Table not have secondary indexes.")

Table not have secondary indexes.


In [32]:
def get_all_genre(key, eq, index_name) -> typing.List[dict]:
    try:
        response = table.query(
            IndexName= index_name, 
            KeyConditionExpression=conditions.Key(key).eq(eq)
        )
    except:
        print(f"The table does not have the specified index: {index_name}")
        response = None
    return response

dm = get_all_genre("Genre", "Blues", "Genre-Rating-index")

The table does not have the specified index: Genre-Rating-index


In [52]:
dm = json.dumps(dm, cls=DecimalEncoder, indent=4)
rs = json.loads(dm)['Items']

In [53]:
len(rs)

161