# Import Libraries


In [58]:
import pandas as pd # data processing
import numpy as np # Mathematical operations
import seaborn as sns # Data visualization
import matplotlib.pyplot as plt # Data visualization
import moviepy.editor as mpy # Video and audio editing
import requests # HTTP requests
import os # File operations
import sys # System operations
import boto3 # AWS SDK
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from pydub import AudioSegment # Audio processing

# Media Files

    A. Issues when loading the video: 

                1. When previewing the video it has a mixer issue - Will skip  this for now till fix is found, will convert the file and store to bucket
                2. Where do we store the temporary files - Files will be stored in a bucket on S3 on AWS

**File convertion**

In [None]:
file_path = "C:/Users/tedjo/Downloads/dancevibedaily/media/"
save_path = "C:/Users/tedjo/Downloads/dancevibedaily/media/audio/"

#Loop through each file within the directory
for file in os.listdir(file_path):
    if file.endswith(".mp4"):
        video_path = os.path.join(file_path, file)

        #Load video file
        try:
            video = mpy.VideoFileClip(video_path)
            print(f'Video Loaded: {video_path}')
            audio = video.audio
            audio.write_audiofile(os.path.join(save_path,f'{file}.mp3'))
            
            # #Access AWS S3 Bucket
            # aws_access_key_id = ''
            # aws_secret_access_key = ''

            # s3 = boto3.resource('s3', aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key)

            # with open(os.path.join(file_path,f'{file}.mp3'), 'rb') as data:
            #     s3.Bucket('dancevibedaily').put_object(Key=f'{file}.mp3', Body=data)
            #     print(f'Audio file uploaded to S3: {file}.mp3')

            # #Remove audio file from local storage
            # os.remove(os.path.join(file_path,f'{file}.mp3'))
            # print("Audio file removed from local storage")

        except Exception as e:
            print(f"Error loading video: {e}")
            continue

# Data Building and Cleaning

**Issues on API use with bucket**

                1. As I am new to Data Engineering I have not figured out how to work with api for now will work with local directory.
                2. There is a 'NoneType' object is not subscriptable, will go through these json and get to understand the issue.

**Audd API**

In [27]:
import requests # HTTPS Requests
data = {
    'api_token': '',
    'return': 'musicbrainz',
}

# Will loop through the files in the directory and extract the song title, artist, and album from the API response
save_path = "C:/Users/tedjo/Downloads/dancevibedaily/media/audio/"
results = []

for i in os.listdir(save_path):
    if i.endswith(".mp3"):
        files = {
            'file': open(os.path.join(save_path,i),'rb') #reading binary file
        }
        try:
            result = requests.post('https://api.audd.io/', data=data, files=files) # Send data to the API 
            if result.json()['status'] == 'error':
                print(f"Error processing file: {result.json()}")
                continue
            print(result.json()['result']['title'])
            results.append({
                'Title': result.json()['result']['title'],
                'Artist': result.json()['result']['artist'],
                'Album': result.json()['result']['album'],
                'Release Date': result.json()['result']['release_date'],
            })# Append the results to the list, which will be used to create a DataFrame
        except Exception as e:
            print(f"Error processing file: {e}")
            continue

df = pd.DataFrame(results) # Create a DataFrame from the results list


All Over You
ULAZI Infinity MusiQ (yey)
Tjina
YESHUA (dance cruise)
Private school
Error processing file: 'NoneType' object is not subscriptable
Look at Her
Killin Dem
Error processing file: 'NoneType' object is not subscriptable
Escapism. (Sped Up)
Error processing file: 'NoneType' object is not subscriptable
Error processing file: 'NoneType' object is not subscriptable
Jinja
Dalie (feat. Baby S.O.N)
soso
الي اين انا ذاهب
Water
Issa Necessary (feat. Kiss Daniel)
Mnike (UK Radio Edit)
Barbie World (with Aqua) [From Barbie The Album]
UNAVAILABLE (feat. Musa Keys)
Look at Her
Hwiralang feat. Siko Wa Mmino,Hlogi Mash
dabala
Mnike
Cough
Error processing file: 'NoneType' object is not subscriptable
Water
Venda Boy (feat. fefe laku dura)
AH TXE TXE (feat. TYSON)
Floor Seats
Error processing file: 'NoneType' object is not subscriptable
Rock Attack
Walking dead
Kafe'
Shu! (feat. Chley)
Error processing file: 'NoneType' object is not subscriptable
Mnike
Get Up (Main Version)
Lost In The Bass. M

In [29]:
df.to_csv('C:/Users/tedjo/Downloads/dancevibedaily/media/audio/audio_metadata.csv', index=False) # Save the DataFrame to a CSV file

**Spotify API**

In [65]:
client_id = ''# Spotify Client Token
client_secret = ''  # Spotify Secret Token
auth_manager = SpotifyClientCredentials(client_id = client_id, client_secret=client_secret)# Initialize the Spotify Client Credentials
sp = spotipy.Spotify(auth_manager=auth_manager)# Initialize the Spotify API client

# #New Columns
# df['Spotify Track ID'] ='' # Track ID for other analysis on tracks
# df['Spotify Track Name'] = ''# Track Name
# df['Sporify Album'] = '' # Album Name
# df['Spotify Arist'] = '' # Artists Name
# df['Spotify Track URL'] = '' # Track URL
# df['Spotify Track Genre'] = ''#Track Genre
# df['Spotify Image Path'] = ''#Image Path
# df['Spotify Track Popularity'] = ''#How popular is this track from 0 to 100

#Function to get details
def get_details(track_name,artist_name,album_name):
    query = f'track: {track_name} artist: {artist_name} album: {album_name}'
    result  = sp.search(q= query, type= 'track',limit=1 )
    if result['tracks']['items']:
        track = result['tracks']['items'][0]
        artist_id = track['artists'][0]['id']
        artist = sp.artist(artist_id)
        return{
            'Spotify Track ID': track['id'],
            'Spotify Track Name': track['name'],
            'Spotify Artist': ', '.join([artist['name'] for artist in track['artists']]),
            'Spotify Album': track['album']['name'],
            'Spotify URL': track['external_urls']['spotify'],
            'Spotify Image URL': track['album']['images'][0]['url'] if track['album']['images'] else None,
            'Spotify Track Popularity': track['popularity'],
            'Spotify Track Genre': ', '.join(artist['genres'])  if artist['genres'] else None
        }
    else:
        return None

for index, row in df.iterrows():
    details = get_details(row['Title'],row['Artist'],row['Album'])
    print(f"Processing {index+1} of {len(df)}")
    print(details)
    if details:
        df.at[index, 'Spotify Track ID'] = details['Spotify Track ID']
        df.at[index, 'Spotify Track Name'] = details['Spotify Track Name']
        df.at[index, 'Spotify Artist'] = details['Spotify Artist']
        df.at[index, 'Spotify Album'] = details['Spotify Album']
        df.at[index, 'Spotify Track URL'] = details['Spotify URL']
        df.at[index, 'Spotify Image URL'] = details['Spotify Image URL']
        df.at[index, 'Spotify Track Popularity'] = details['Spotify Track Popularity']
        df.at[index, 'Spotify Track Genre'] = details['Spotify Track Genre']


Processing 1 of 195
{'Spotify Track ID': '75Q69chmd8CEZbVsA4CDMm', 'Spotify Track Name': 'Gorgeous', 'Spotify Artist': 'mansionz', 'Spotify Album': 'Mansionz', 'Spotify URL': 'https://open.spotify.com/track/75Q69chmd8CEZbVsA4CDMm', 'Spotify Image URL': 'https://i.scdn.co/image/ab67616d0000b27390d0e1eb630b1595d23a1a0e', 'Spotify Track Popularity': 56, 'Spotify Track Genre': 'indie pop rap'}
Processing 2 of 195
{'Spotify Track ID': '4k2yKSDE5cSUED46GFbSdd', 'Spotify Track Name': 'ULAZI Infinity MusiQ (yey)', 'Spotify Artist': 'uLazi', 'Spotify Album': 'ULAZI Infinity MusiQ (yey)', 'Spotify URL': 'https://open.spotify.com/track/4k2yKSDE5cSUED46GFbSdd', 'Spotify Image URL': 'https://i.scdn.co/image/ab67616d0000b2739a1373a5bb0ce5db2c080155', 'Spotify Track Popularity': 28, 'Spotify Track Genre': 'sgija'}
Processing 3 of 195
{'Spotify Track ID': '4DO34AAYHCCF2gs7jxLFYx', 'Spotify Track Name': 'Tjina', 'Spotify Artist': 'Megadrumz, Lady Du', 'Spotify Album': 'For Your Soul', 'Spotify URL': 'h

In [67]:
df

Unnamed: 0,Title,Artist,Album,Release Date,Spotify Track ID,Spotify Track Name,Sporify Album,Spotify Arist,Spotify Track URL,Spotify Track Genre,Spotify Image Path,Spotify Track Popularity,Spotify Artist,Spotify Album,Spotify URL,Spotify Image URL,Track Popularity
0,All Over You,Sped Up 204,GORGEOUS,2023-09-08,75Q69chmd8CEZbVsA4CDMm,Gorgeous,,,https://open.spotify.com/track/75Q69chmd8CEZbV...,indie pop rap,,56,mansionz,Mansionz,https://open.spotify.com/track/75Q69chmd8CEZbV...,https://i.scdn.co/image/ab67616d0000b27390d0e1...,56.0
1,ULAZI Infinity MusiQ (yey),uLazi,ULAZI Infinity MusiQ (yey),2022-12-01,4k2yKSDE5cSUED46GFbSdd,ULAZI Infinity MusiQ (yey),,,https://open.spotify.com/track/4k2yKSDE5cSUED4...,sgija,,28,uLazi,ULAZI Infinity MusiQ (yey),https://open.spotify.com/track/4k2yKSDE5cSUED4...,https://i.scdn.co/image/ab67616d0000b2739a1373...,28.0
2,Tjina,Megadrumz/Lady Du,For Your Soul (Extended Edition),2023-09-01,4DO34AAYHCCF2gs7jxLFYx,Tjina,,,https://open.spotify.com/track/4DO34AAYHCCF2gs...,,,47,"Megadrumz, Lady Du",For Your Soul,https://open.spotify.com/track/4DO34AAYHCCF2gs...,https://i.scdn.co/image/ab67616d0000b2735e769a...,47.0
3,YESHUA (dance cruise),Dj Bentoa,YESHUA (dance cruise),2023-04-22,2EIVvLv9sXdNB5KI0KcJ43,YESHUA - dance cruise,,,https://open.spotify.com/track/2EIVvLv9sXdNB5K...,,,45,Dj Bentoa,YESHUA (dance cruise),https://open.spotify.com/track/2EIVvLv9sXdNB5K...,https://i.scdn.co/image/ab67616d0000b273dcc7d8...,45.0
4,Private school,Juicy Kay Dee,Private school,2021-02-18,4wi3QSScNkYsNjv5rFTDvl,Juicey Ahhh,,,https://open.spotify.com/track/4wi3QSScNkYsNjv...,milwaukee hip hop,,48,J.P.,Juicey Ahhh,https://open.spotify.com/track/4wi3QSScNkYsNjv...,https://i.scdn.co/image/ab67616d0000b273f83c19...,48.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
190,Polémique,Cino Black,Polémique,2023-11-27,64JSYA1u4PsbERSftIgh17,Polémique,,,https://open.spotify.com/track/64JSYA1u4PsbERS...,rap congolais,,31,"Cino Black, Zaparo De Guerre",Polémique,https://open.spotify.com/track/64JSYA1u4PsbERS...,https://i.scdn.co/image/ab67616d0000b273206488...,31.0
191,FEEL,Davido,Timeless,2023-03-20,1mk8ZC9OeTZMr8Wy31LqRj,FEEL,,,https://open.spotify.com/track/1mk8ZC9OeTZMr8W...,"afrobeats, afropop, azontobeats, ghanaian hip ...",,68,Davido,Timeless,https://open.spotify.com/track/1mk8ZC9OeTZMr8W...,https://i.scdn.co/image/ab67616d0000b273adfc1a...,68.0
192,Automne Gold,DRS,Automne Gold,2023-08-12,0a057xgkRUb7JkaGiBdRn6,Automne Gold,,,https://open.spotify.com/track/0a057xgkRUb7Jka...,,,27,Drs,Automne Gold,https://open.spotify.com/track/0a057xgkRUb7Jka...,https://i.scdn.co/image/ab67616d0000b273d11998...,27.0
193,Dalie (feat. Baby S.O.N),Kamo Mphela,Dalie (feat. Baby S.O.N),2023-10-11,4URabg9AGHasjFEVdTbWcC,Dalie (feat. Baby S.O.N),,,https://open.spotify.com/track/4URabg9AGHasjFE...,amapiano,,62,"Kamo Mphela, Tyler ICU, Khalil Harrison, Baby ...",Dalie (feat. Baby S.O.N),https://open.spotify.com/track/4URabg9AGHasjFE...,https://i.scdn.co/image/ab67616d0000b273c6ef6a...,62.0


# Data Cleaning

In [None]:
import random
while True:
    def create_account(username = '', account_details='', balance = 0):
        username = str(input("Welcome Please enter name: "))
        account_details = [random.choice([1,2,3,4,5,6,7,8,9] for _ in range(8))]
        account_details = ''.join(map(str,account_details))
        balance  = balance

In [43]:
def create_account(username = '', account_details='', balance = 0):
        username = str(input("Welcome Please enter name: "))
        account_details = [random.choice([1,2,3,4,5,6,7,8,9]) for _ in range(8)]
        account_details = ''.join(map(str,account_details))
        balance  = balance
        print("Account has been created successfully")
        print(f'Name:{username},\n Account Number: {account_details},\n Balance: {balance}')
        return username, account_details, balance

ted = create_account()

#create function view details which will display the account details from the account created in function create_account
def view_details():
    print(f'Name:{ted[0]},\n Account Number: {ted[1]},\n Balance: {ted[2]}')

def deposit_cash(account_number):
    if account_number == ted[1]:
        cash = int(input("How much would you like to deposit today: "))
        prev_balance = ted[2]
        new_balance =  cash + prev_balance
        print(f'Cash Deposited: {cash}, \n Previous Balance: {prev_balance}, \n New Balance: {new_balance}')
    else:
        print('Account number invalid')

def withdraw_cash(account_number):
    


Account has been created successfully
Name:8000,
 Account Number: 18484224,
 Balance: 0
Name:8000,
 Account Number: 18484224,
 Balance: 0


<function __main__.deposit_cash(account_number)>

In [54]:
def deposit_cash(account_number):
    if account_number == ted[1]:
        cash = int(input("How much would you like to deposit today: "))
        prev_balance = ted[2]
        new_balance =  cash + prev_balance
        print(f'Cash Deposited: {cash}, \n Previous Balance: {prev_balance}, \n New Balance: {new_balance}')
    else:
        print('Account number invalid')
deposit_cash('18484224')

Cash Deposited: 8000, 
 Previous Balance: 0, 
 New Balance: 8000
