#### song downloader file

In [2]:
import requests
from bs4 import BeautifulSoup
import os

import pandas as pd
import re

In [4]:
movies_df = pd.read_csv('movies_list_allu_arjun.txt',names = ['movies_urls'])
movies_df.head()

Unnamed: 0,movies_urls
0,"<a href=""https://sensongsmp3.tv/pushpa-songs-f..."
1,"<a href=""https://sensongsmp3.tv/ala-vaikuntapu..."
2,"<a href=""https://sensongsmp3.tv/allu-arjun/"" t..."
3,"<a href=""https://sensongsmp3.tv/naa-peru-surya..."
4,"<a href=""https://sensongsmp3.tv/en-peyar-surya..."


### Data preprocessing
----

In [149]:
def title_separate(x):
    '''method to separate the title name from raw text'''
    try:
        regex = r"\btitle(.*)\""
        result = re.findall(regex,x)
        return result[0].lstrip('="')
    except Exception as e:
        return None

In [150]:
def url_separate(x):
    '''method to separate movie url from raw text '''
    try:
        regex = r"(?i)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'\".,<>?«»“”‘’]))"
        result = re.findall(regex,x)
        return result[0][0]
    except Exception as e:
        return None

In [151]:
#url separation
movies_df['url_links'] = movies_df['movies_urls'].apply(url_separate)
#movie title separation
movies_df['movie_name'] = movies_df['movies_urls'].apply(title_separate)
movies_df.head()

Unnamed: 0,movies_urls,url_links,movie_name
0,"<a href=""https://sensongsmp3.tv/telugump3s19/""...",https://sensongsmp3.tv/telugump3s19/,
1,"<a href=""https://sensongsmp3.tv/bheemla-nayak-...",https://sensongsmp3.tv/bheemla-nayak-2022-song...,Bheemla Nayak (2022)
2,"<a href=""https://sensongsmp3.tv/vakeel-saab-so...",https://sensongsmp3.tv/vakeel-saab-songs-downl...,Vakeel Saab (2021)
3,"<a href=""https://sensongsmp3.tv/hari-hara-veer...",https://sensongsmp3.tv/hari-hara-veera-mallu-2...,Hari Hara Veera Mallu (2022)
4,"<a href=""https://sensongsmp3.tv/pada-pada-song...",https://sensongsmp3.tv/pada-pada-song/,Pada Pada Song


In [152]:
def movie_page_get(url):
    '''
    method to navigate movie page and grab song urls
    '''
    try:
        req = requests.get(url).text
        soup = BeautifulSoup(req, "lxml")
        song_area = soup.find("div",attrs={"class":"entry-content"})
        required_links = song_area.find_all("a",attrs = {"rel":"nofollow"})
        results = [url_separate(str(i)) for i in required_links]
        return results
    except Exception as e:
        pass
    

In [160]:
#this method will extract songname from song url
def get_song_name(song_url):
    '''method to get song_name from song url'''
    try:
        song_name = song_url.split('/')[-1]
        return song_name
    except Exception as e:
        pass

In [154]:
def song_download(song_urls):
    '''method to download song '''
    try:
        for i in song_urls:
            download = requests.get(i)
            file_name = get_song_name(i)
            #print(download.headers)
            #filename = getFilename_fromCd(download.headers.get('content-disposition'))
            if download.status_code == 200:
                with open('songs/'+file_name, 'wb') as f:
                    f.write(download.content)
            else:
                print(f"Download Failed For File {file_name}")
    except Exception as e:
        pass

In [155]:
movies_df['song_urls'] = movies_df['url_links'].apply(song_page_get)
movies_df.head()

Unnamed: 0,movies_urls,url_links,movie_name,song_urls
0,"<a href=""https://sensongsmp3.tv/telugump3s19/""...",https://sensongsmp3.tv/telugump3s19/,,[https://sensongsmp3.la]
1,"<a href=""https://sensongsmp3.tv/bheemla-nayak-...",https://sensongsmp3.tv/bheemla-nayak-2022-song...,Bheemla Nayak (2022),[https://mp3teluguwap.net/mp3/2021/Bheemla%20N...
2,"<a href=""https://sensongsmp3.tv/vakeel-saab-so...",https://sensongsmp3.tv/vakeel-saab-songs-downl...,Vakeel Saab (2021),[https://mp3teluguwap.net/mp3/2021/Vakeel%20Sa...
3,"<a href=""https://sensongsmp3.tv/hari-hara-veer...",https://sensongsmp3.tv/hari-hara-veera-mallu-2...,Hari Hara Veera Mallu (2022),[https://mp3teluguwap.net/Teaser/Hari%20Hara%2...
4,"<a href=""https://sensongsmp3.tv/pada-pada-song...",https://sensongsmp3.tv/pada-pada-song/,Pada Pada Song,[https://mp3teluguwap.net/mp3/2018/Janasena/Pa...


In [156]:
movies_df['total_songs'] = movies_df['song_urls'].apply(len)
movies_df.head()

Unnamed: 0,movies_urls,url_links,movie_name,song_urls,total_songs
0,"<a href=""https://sensongsmp3.tv/telugump3s19/""...",https://sensongsmp3.tv/telugump3s19/,,[https://sensongsmp3.la],1
1,"<a href=""https://sensongsmp3.tv/bheemla-nayak-...",https://sensongsmp3.tv/bheemla-nayak-2022-song...,Bheemla Nayak (2022),[https://mp3teluguwap.net/mp3/2021/Bheemla%20N...,18
2,"<a href=""https://sensongsmp3.tv/vakeel-saab-so...",https://sensongsmp3.tv/vakeel-saab-songs-downl...,Vakeel Saab (2021),[https://mp3teluguwap.net/mp3/2021/Vakeel%20Sa...,10
3,"<a href=""https://sensongsmp3.tv/hari-hara-veer...",https://sensongsmp3.tv/hari-hara-veera-mallu-2...,Hari Hara Veera Mallu (2022),[https://mp3teluguwap.net/Teaser/Hari%20Hara%2...,2
4,"<a href=""https://sensongsmp3.tv/pada-pada-song...",https://sensongsmp3.tv/pada-pada-song/,Pada Pada Song,[https://mp3teluguwap.net/mp3/2018/Janasena/Pa...,1


In [157]:
movies_df['total_songs'].describe()

count    30.000000
mean      6.133333
std       3.683452
min       0.000000
25%       5.250000
50%       6.000000
75%       7.000000
max      18.000000
Name: total_songs, dtype: float64

In [None]:
movies_df['song_urls'].apply(song_download)