In [None]:
import requests
import pandas as pd
from requests_html import HTML
from requests_html import HTMLSession

In [None]:
# !pip install requests-html

In [None]:
def get_source(url):
    """Return the source code for the provided URL. 

    Args: 
        url (string): URL of the page to scrape.

    Returns:
        response (object): HTTP response object from requests_html. 
    """

    try:
        session = HTMLSession()
        response = session.get(url)
        return response

    except requests.exceptions.RequestException as e:
        print(e)

In [None]:
def get_feed(url):
    """Return a Pandas dataframe containing the RSS feed contents.

    Args: 
        url (string): URL of the RSS feed to read.

    Returns:
        df (dataframe): Pandas dataframe containing the RSS feed contents.
    """
    
    response = get_source(url)
    
    df = pd.DataFrame(columns = ['title', 'pubDate', 'guid', 'description'])

    with response as r:
        items = r.html.find("item", first=False)

        for item in items:        

            title = item.find('title', first=True).text
            pubDate = item.find('pubDate', first=True).text
            guid = item.find('guid', first=True).text
            description = item.find('description', first=True).text

            row = {'title': title, 'pubDate': pubDate, 'guid': guid, 'description': description}
            df = df.append(row, ignore_index=True)

    return df

In [None]:
url = 'https://cdn.marschannels.com/files/feed/omjwr7pvui.xml'


In [None]:
df = get_feed(url)

In [None]:
df.head()

Unnamed: 0,title,pubDate,guid,description
0,Beyond The Usual - Quintana,2022-06-06T01:07:05.000Z,27069063,Beyond The Usual - Quintana
1,The Harriet Tubman Story,2022-06-05T00:31:22.000Z,27069064,Does God answer prayers? Every day Harriet Tub...
2,Benevolence,2022-06-02T13:48:51.000Z,27069065,"Ruth, a young woman is tenacious to hold on to..."
3,Daddy We're Back - Episode 4,2022-06-06T12:21:54.000Z,27069066,Daddy We're Back - Episode 4
4,Hercules Unchained,2022-06-03T22:54:38.000Z,27069067,"The Greek muscleman fights leopards, a giant a..."


In [None]:
len(df)

1304

In [None]:
df.to_csv('/content/drive/MyDrive/castify/search_feed.csv')

Merging the two data

In [None]:
import pandas as pd

# the search feed and duration

search_feed = '/content/drive/MyDrive/castify/search_feed.csv'

duration = '/content/drive/MyDrive/castify/duration.csv'

# read the data

search_feed = pd.read_csv(search_feed)

duration = pd.read_csv(duration)

search_feed = search_feed.drop('Unnamed: 0',axis=1)

In [None]:
search_feed['id'] = search_feed.index + 1

duration['id'] = duration.index + 1

search_feeds = pd.merge(search_feed, duration, on="id")

search_feeds = search_feeds.drop(['pubDate','id'],axis=1)

search_feeds.columns = ['title','video_id','description','duration(secs)']

search_feeds.to_excel('/content/drive/MyDrive/castify/search_feed_duration.xlsx',index=False)

In [None]:
search_feeds = pd.read_excel('/content/drive/MyDrive/castify/search_feed_duration.xlsx')

search_feeds.head()

Unnamed: 0,title,video_id,description,duration(secs)
0,Beyond The Usual - Quintana,27069063,Beyond The Usual - Quintana,725
1,The Harriet Tubman Story,27069064,Does God answer prayers? Every day Harriet Tub...,1810
2,Benevolence,27069065,"Ruth, a young woman is tenacious to hold on to...",7030
3,Daddy We're Back - Episode 4,27069066,Daddy We're Back - Episode 4,1326
4,Hercules Unchained,27069067,"The Greek muscleman fights leopards, a giant a...",5383


### Extra analysis 

In [None]:
search_feed['id'] = search_feed.index + 1

# search_feed = search_feed.set_index('id')

search_feed.head(2)

Unnamed: 0,title,pubDate,guid,description,id
0,Beyond The Usual - Quintana,2022-06-06T01:07:05.000Z,27069063,Beyond The Usual - Quintana,1
1,The Harriet Tubman Story,2022-06-05T00:31:22.000Z,27069064,Does God answer prayers? Every day Harriet Tub...,2


In [None]:
duration['id'] = duration.index + 1

# duration = duration.set_index('id')

duration.head(2)

Unnamed: 0,duration,id
0,725,1
1,1810,2


In [None]:
search_feeds = pd.merge(search_feed, duration, on="id")

search_feeds

In [None]:
search_feeds = pd.read_excel('/content/drive/MyDrive/castify/search_feed_duration.xlsx')

search_feeds.head()

Unnamed: 0.1,Unnamed: 0,title,guid,description,duration
0,0,Beyond The Usual - Quintana,27069063,Beyond The Usual - Quintana,725
1,1,The Harriet Tubman Story,27069064,Does God answer prayers? Every day Harriet Tub...,1810
2,2,Benevolence,27069065,"Ruth, a young woman is tenacious to hold on to...",7030
3,3,Daddy We're Back - Episode 4,27069066,Daddy We're Back - Episode 4,1326
4,4,Hercules Unchained,27069067,"The Greek muscleman fights leopards, a giant a...",5383


In [None]:
search_feeds.columns = ['id', 'title', 'guid', 'description', 'duration(secs)']

In [None]:
search_feeds

Unnamed: 0,id,title,guid,description,duration(secs)
0,0,Beyond The Usual - Quintana,27069063,Beyond The Usual - Quintana,725
1,1,The Harriet Tubman Story,27069064,Does God answer prayers? Every day Harriet Tub...,1810
2,2,Benevolence,27069065,"Ruth, a young woman is tenacious to hold on to...",7030
3,3,Daddy We're Back - Episode 4,27069066,Daddy We're Back - Episode 4,1326
4,4,Hercules Unchained,27069067,"The Greek muscleman fights leopards, a giant a...",5383
...,...,...,...,...,...
1299,1299,Ponysitter's Club: Fun at the Fair,40018568,"Excited about the Fall Fair, the Ponysitters a...",4811
1300,1300,Christmas Recipe for Romance,40018586,The owner of a historic inn decides to enter a...,5293
1301,1301,Operation Christmas List,40035213,12-year-old Barney has a plan to buy the entir...,5299
1302,1302,Christmas Catch,40035600,Detective Bennett is on the trail of a thief a...,5204


In [None]:
search_feeds.tail()

Unnamed: 0,id,title,guid,description,duration(secs)
1299,1299,Ponysitter's Club: Fun at the Fair,40018568,"Excited about the Fall Fair, the Ponysitters a...",4811
1300,1300,Christmas Recipe for Romance,40018586,The owner of a historic inn decides to enter a...,5293
1301,1301,Operation Christmas List,40035213,12-year-old Barney has a plan to buy the entir...,5299
1302,1302,Christmas Catch,40035600,Detective Bennett is on the trail of a thief a...,5204
1303,1303,Time Out Sports Show Ep 01,40895595,"Ep 01 - November 22, 2022",3358


In [None]:
search_feeds.to_excel('/content/drive/MyDrive/castify/search_feed_duration.xlsx')