In [1]:
import requests
import json
import os

# Get books from GOOGLE BOOK API v1

In [3]:
def get_books(api_key, query, max_results=5):
    base_url = "https://www.googleapis.com/books/v1/volumes"
    params = {
        'q': query,
        'maxResults': max_results,
        'key': api_key,
        'orderBy': 'relevance'
    }
    response = requests.get(base_url, params=params)
    
    if response.status_code == 200:
        return response.json().get('items', [])
    else:
        print(f"Failed to retrieve books: {response.status_code}")
        return []

### Download the image

In [4]:
def download_image(url, folder):
    response = requests.get(url, stream=True)
    if response.status_code == 200:
        img_name = os.path.basename(url.split("?")[-1].split("&")[0].split("=")[-1])
        img_path = os.path.join(folder, img_name)
        with open(img_path + ".jpg", 'wb') as f:
            for chunk in response.iter_content(1024):
                f.write(chunk)
        return img_path
    else:
        print(f"Failed to download image: {url}")
        return ''

def extract_book_info(book, img_folder):
    volume_info = book.get('volumeInfo', {})
    sale_info = book.get('saleInfo', {})
    list_price = sale_info.get('listPrice', {})

    image_url = volume_info.get('imageLinks', {}).get('thumbnail', '')

    # Download image if image_url is available
    image_path = download_image(image_url, img_folder) if image_url else ''

    return {
        'name': volume_info.get('title', 'No title'),
        'genre': volume_info.get('categories', ['Uncategorized']),
        'description': volume_info.get('description', 'No description'),
        'avgRating': volume_info.get('averageRating', 0),
        'ratingsCount': volume_info.get('ratingsCount', 0),
        'image_path': image_path,

        'authors': volume_info.get('authors', ['Unknown author']),
        'publisher': volume_info.get('publisher', 'Unknown publisher'),
        'pageCount': volume_info.get('pageCount', 0),
        'language': volume_info.get('language', 'Unknown language'),
        'publishedDate': volume_info.get('publishedDate', 'Unknown date'),
    }

### Send the request and get data

In [5]:
def save_to_json(data, filename):
    with open(filename, 'w') as f:
        json.dump(data, f, indent=4)

# Replace with your actual API key
api_key = 'AIzaSyCM7yvM245wgLWtsN-4hDhdtAb2RtzWLfQ'
queries = ["Adventure", "Classics", "Fantasy", "Fiction", "Horror", "Mystery", "Thriller", "Romance", "Suspense", "Young", 
           "Biography", "CookBook", "Essay", "Poetry", "Art", "Business", "Computer", "Education", "Engineering", "Heath", 
           "Fitness", "Law", "Mathematics", "Medical", "Psychology", "Philosophy", "Social", "Sports", "Technology", "Drama", 
           "LGBT", "Humor", "Economics", "Comics", "Graphic", "Hobbies", "Family", "Relationships", "Study", "Foreign", 
           "Game", "Gardening", "Disciplines", "Music", "Nature", "Pet", "Travel", "Adult", "Aid", "Nonfiction"]  # Add more queries as needed
all_books = []
img_folder = "books"

# Create the images folder if it doesn't exist
if not os.path.exists(img_folder):
    os.makedirs(img_folder)

# print(get_books(api_key, "Education", 1))

for query in queries:
    books = get_books(api_key, query, 10)
    for book in books:
        book_info = extract_book_info(book, img_folder)
        all_books.append(book_info)
    
save_to_json(all_books, 'books.json')
print(f"Data saved to books.json")

Data saved to books.json


# Get CD from DISCOGS API

In [6]:
def get_discogs_cd_data(token, query, max_results=5):
    base_url = "https://api.discogs.com/database/search"
    headers = {
        'Authorization': f'Discogs token={token}'
    }
    params = {
        'q': query,
        'format': 'CD',
        'per_page': max_results,
        'page': 1
    }
    response = requests.get(base_url, headers=headers, params=params)
    
    if response.status_code == 200:
        return response.json().get('results', [])
    else:
        print(f"Failed to retrieve CDs from Discogs: {response.status_code}")
        return []


### Get the track list and videos

In [7]:
def get_discogs_master_data(url):
    response = requests.get(url)
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Failed to retrieve master data: {response.status_code}")
        return {}

### Download the cover image

In [8]:
def download_image(url, folder):
    try:
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
        }
        response = requests.get(url, headers=headers, stream=True)
        if response.status_code == 200:
            img_name = os.path.basename(url.split("/")[-1].split('.')[0])
            img_path = os.path.join(folder, img_name)
            # print(img_path)
            
            with open(img_path + ".jpg", 'wb') as f:
                for chunk in response.iter_content(1024):
                    f.write(chunk)
                    
            return img_path
        else:
            print(f"Failed to download image: {url}, Status code: {response.status_code}")
            return ''
    except Exception as e:
        print(f"Error downloading image from URL: {url}, Error: {e}")
        return ''

### Write data into json file

In [9]:
def extract_cd_info(cd, img_folder):
    master_url = cd.get('master_url', '')
    resources_url = cd.get('resource_url', '')
    tracklist = []
    videos = []
    artists = []
    if master_url:
        master_data = get_discogs_master_data(master_url) or get_discogs_master_data(resources_url)
        tracklist = [
            {"position": track.get("position", ""), "title": track.get("title", ""), "duration": track.get("duration", "")}
            for track in master_data.get('tracklist', [])
        ]
        videos = [
            {"uri": video.get("uri", ""), "title": video.get("title", ""), "description": video.get("description", ""), "duration": video.get("duration", "")}
            for video in master_data.get('videos', [])
        ]
        artists = [artist.get('name', '') for artist in master_data.get('artists', [])]
    
    cover_image_url = cd.get('cover_image', '')
    cover_image_path = download_image(cover_image_url, img_folder) if cover_image_url else ''
    
    return {
        'name': cd.get('title', 'No title'),
        'image_path': cover_image_path,
        'genre': cd.get('genre', ['Unknown genre']),

        'collections': cd.get('type', 'Unknown collections'),
        'albums': cd.get('style', ['Unknown albums']),
        'artist': artists,
        'videos': videos,
        'tracklist': tracklist,
        'label': cd.get('label', ['Unknown label']),
        'year': cd.get('year', 'Unknown year'),
        'country': cd.get('country', 'Unknown country'),
    }

def save_to_json(data, filename):
    with open(filename, 'w') as f:
        json.dump(data, f, indent=4)

### Send request and get data

In [None]:
discogs_token = "TFkAZFHFAlqtzXPpELvAyAPDETdPQTKoqDaENYjv"
queries = ["Christian", "Classical", "Country", "Jazz", "Misc", "Rap", "Rock", "Soul", "Soundtrack", "Standard", 
           "World", "Punk", "Blues", "Opera", "Symphony", "Garage", "EDM", "Beat", "House", "Electro", "Hop", "Pop"]
all_cds = []
img_folder = "cds"

# Create the images folder if it doesn't exist
if not os.path.exists(img_folder):
    os.makedirs(img_folder)

for query in queries:
    cds = get_discogs_cd_data(discogs_token, query, 20)
    for cd in cds:
        cd_info = extract_cd_info(cd, img_folder)
        all_cds.append(cd_info)
    
save_to_json(all_cds, 'cds.json')
print(f"Data saved to cds.json")

# Get DVDs from OMDB API

In [49]:
def fetch_movie_data(api_key, movie_title):
    base_url = 'http://www.omdbapi.com/'
    params = {'apikey': api_key, 't': movie_title}
    
    try:
        response = requests.get(base_url, params=params)
        response.raise_for_status()  # Raise an exception for HTTP errors (4xx or 5xx)
        movie_data = response.json()
        
        if movie_data['Response'] == 'False':
            print(f"Error: {movie_data['Error']}")
            return {}
        
        return movie_data
    
    except requests.exceptions.RequestException as e:
        print(f"Request error: {e}")
        return {}
    

### Download the poster

In [50]:
def download_image(url, folder):
    try:
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
        }
        response = requests.get(url, headers=headers, stream=True)
        if response.status_code == 200:
            img_name = os.path.basename(url.split("/")[-1].split('.')[0])
            img_path = os.path.join(folder, img_name)
            # print(img_path)
            
            with open(img_path + ".jpg", 'wb') as f:
                for chunk in response.iter_content(1024):
                    f.write(chunk)
                # f.write(response.content)
                    
            return img_path
        else:
            print(f"Failed to download image: {url}, Status code: {response.status_code}")
            return ''
    except Exception as e:
        print(f"Error downloading image from URL: {url}, Error: {e}")
        return ''

In [54]:
def extract_dvd_info(dvd, folder):
    if len(dvd) == 0:
        return {}
    poster_url = dvd.get('Poster', '')
    # print(poster_url)
    poster_path = download_image(poster_url, folder) if poster_url else ''
    
    return {
        'name': dvd.get('Title', 'No title'),
        'description': dvd.get('Plot', 'No plot'),
        'genre': dvd.get('Genre', 'Unknown genre, ').split(', '),
        'avgRating': dvd.get('imdbRating', '0'),
        'ratingsCount': dvd.get('imdbVotes', '0'),
        'image_path': poster_path,
        
        'director': dvd.get('Director', 'Unknown director, ').split(', '),
        'runtime': dvd.get('Runtime', 'Unknown duration'),
        'writer': dvd.get('Writer', 'Unknown writer, ').split(', '),
        'actors': dvd.get('Actors', 'Unknown actors, ').split(', '),
        'language': dvd.get('Language', 'Unknown language'),
        'releasedDate': dvd.get('Released', 'Unknown date'),
        'year': dvd.get('Year', 'Unknown year'),
        'country': dvd.get('Country', 'Unknown country'),
    }

In [55]:
api_key = '8dff5f9f'
keywords = [
    "man", "star", "movie", "home", "wars", "day", "last", "night", "spider", "war",
    "death", "la", "men", "big", "black", "book", "christmas", "dark", "city", "family",
    "halloween", "perfect", "secret", "summer", "true", "white", "world", "american", "bad", "story",
    "blood", "blue", "dog", "dragon", "evil", "final", "good", "green", "happy", "harry",
    "fallen", "fantasy", "fast", "avengers", "beauty", "boss", "brave", "captain", "crazy", "deadpool",
    "avatar", "avatar 2", "avatar 3", "avatar 4", "avatar 5", "age of ultron", "endgame", "infinity war",
    "tv", "love", "life", "time", "murder", "teenager", "virus", "zombie", "kiss"
]

with open('list.json', 'r') as f:
    list = json.load(f)

all_dvds = []
img_folder = "dvds"

# Create the images folder if it doesn't exist
if not os.path.exists(img_folder):
    os.makedirs(img_folder)

for title in list:
    dvd = fetch_movie_data(api_key, title)
    dvd_info = extract_dvd_info(dvd, img_folder)
    if len(dvd_info) != 0:
        all_dvds.append(dvd_info)


# save_to_json(all_dvds, 'dvds.json')
save_to_json(all_dvds, 'new_dvds.json')

Error: Movie not found!
Error: Movie not found!
Error: Movie not found!
Error: Movie not found!
Error: Movie not found!
