In [1]:
pip install instaloader


Note: you may need to restart the kernel to use updated packages.


In [3]:
import instaloader
import os
import pandas as pd
from instaloader.exceptions import ConnectionException

def extract_metadata(username, output_directory, max_posts=5300):
    # Create an instance of Instaloader class
    loader = instaloader.Instaloader()

    try:
        # Retrieve the profile of the provided username
        profile = instaloader.Profile.from_username(loader.context, username)

        # Create a list to store metadata
        metadata_list = []

        # Iterate over the posts of the profile
        for index, post in enumerate(profile.get_posts()):
            # Break the loop if max_posts have been processed
            if index >= max_posts:
                break

            try:
                # Download the media (images and videos) associated with the post
                loader.download_post(post, target=output_directory)

                # Extract metadata
                metadata = {
                    "Post URL": post.url,
                    "Caption": post.caption,
                    "Likes": post.likes,
                    "Comments": post.comments,
                    "Timestamp": post.date_utc
                }
                metadata_list.append(metadata)
            except ConnectionException as e:
                # Handle ConnectionException (HTTP 400 error) gracefully
                print(f"Error downloading post {post.url}: {e}")
                continue  # Skip to the next post

        # Convert metadata list to DataFrame
        metadata_df = pd.DataFrame(metadata_list)

        # Save metadata to CSV file
        metadata_csv_path = os.path.join(output_directory, f"{profile.username}_Final.csv")
        metadata_df.to_csv(metadata_csv_path, index=False)

        print("Metadata extraction completed. CSV file saved at:", metadata_csv_path)
    
    except instaloader.exceptions.ProfileNotExistsException:
        print("Error: Profile does not exist.")

if __name__ == "__main__":
    # Instagram username
    username = "officialjiocinema"  # Instagram_username
    # Output directory
    output_directory = r"jio"

    extract_metadata(username, output_directory)


jio\2024-02-02_04-16-13_UTC.jpg [The stars are aligning on the…] json 
jio\2024-02-19_07-30-02_UTC.jpg [The gates to Night Country ar…] json 
jio\2024-02-13_05-47-50_UTC.jpg [𝑻𝒉𝒆 𝑻𝒊𝒎𝒆 𝑰𝒔 𝑵𝒐𝒘! 🔥  Get ready…] jio\2024-02-13_05-47-50_UTC.mp4 json 
jio\2024-02-21_16-25-09_UTC.jpg [@mirshad_michu32 was on 🔝 of …] jio\2024-02-21_16-25-09_UTC.mp4 json 
jio\2024-02-21_16-05-46_UTC.jpg [The #Highlanders solidify the…] jio\2024-02-21_16-05-46_UTC.mp4 json 
jio\2024-02-21_16-03-31_UTC.jpg [Ice-🆒 from the penalty spot! …] jio\2024-02-21_16-03-31_UTC.mp4 json 
jio\2024-02-21_15-57-42_UTC.jpg [The #Highlanders register an …] json 
jio\2024-02-21_15-52-38_UTC.jpg [Absolute scenes in #Goa as @n…] jio\2024-02-21_15-52-38_UTC.mp4 json 
jio\2024-02-21_15-19-41_UTC.jpg [Will Yastikaa pick 𝐬𝐰𝐞𝐞𝐭 𝐨𝐫 𝐝…] jio\2024-02-21_15-19-41_UTC.mp4 json 
jio\2024-02-21_14-58-39_UTC.jpg [4 Days before we showcase our…] jio\2024-02-21_14-58-39_UTC.mp4 json 
jio\2024-02-21_14-49-39_UTC.jpg [A goalless first-half comes t…] j

In [4]:
import os
import shutil

def filter_and_copy_files(input_directory, output_directory):
    # Create the output directory if it does not exist
    os.makedirs(output_directory, exist_ok=True)

    # Create subdirectories for Images and Videos
    images_directory = os.path.join(output_directory, 'Images')
    videos_directory = os.path.join(output_directory, 'Videos')
    os.makedirs(images_directory, exist_ok=True)
    os.makedirs(videos_directory, exist_ok=True)

    # Iterate through all files in the input directory
    for filename in os.listdir(input_directory):
        # Check if the file is a JPG or MP4 file
        if filename.endswith('.jpg'):
            # Construct the full path of the file
            source_file = os.path.join(input_directory, filename)
            # Copy the file to the Images subdirectory
            shutil.copy(source_file, os.path.join(images_directory, filename))
        elif filename.endswith('.mp4'):
            # Construct the full path of the file
            source_file = os.path.join(input_directory, filename)
            # Copy the file to the Videos subdirectory
            shutil.copy(source_file, os.path.join(videos_directory, filename))

    print("Filtered files copied to:", output_directory)

if __name__ == "__main__":
    # Input directory containing downloaded files
    input_directory = r"jio"
    # Output directory for filtered files
    output_directory = r"E:\NYX\Jio Cinema"

    filter_and_copy_files(input_directory, output_directory)


Filtered files copied to: E:\NYX\Jio Cinema


In [5]:
!pip install moviepy



In [None]:
import os
import csv
from moviepy.editor import VideoFileClip

def get_video_duration(file_path):
    try:
        clip = VideoFileClip(file_path)
        duration = clip.duration
        clip.close()
        return duration
    except Exception as e:
        print(f"Error extracting duration for {file_path}: {e}")
        return None

def generate_csv(folder_path, csv_filename):
    # Create or open the CSV file for writing
    with open(csv_filename, 'w', newline='') as csv_file:
        # Create a CSV writer object
        csv_writer = csv.writer(csv_file)

        # Write header to the CSV file
        csv_writer.writerow(['File Name', 'File Type', 'Duration (s)'])

        # Traverse through the folder and its subfolders
        for root, dirs, files in os.walk(folder_path):
            for file_name in files:
                file_path = os.path.join(root, file_name)

                # Get the file extension
                file_type = file_name.split('.')[-1]

                # Get duration for videos, None for non-video files
                if file_type.lower() in ['mp4', 'avi', 'mkv', 'mov']:
                    duration = get_video_duration(file_path)
                else:
                    duration = None

                # Write information to the CSV file
                csv_writer.writerow([file_name, file_type, duration])

    print(f"CSV file '{csv_filename}' generated successfully.")

# Provide the folder path and desired CSV filename
folder_path = r"E:\NYX\Jio Cinema"
csv_filename = 'jicinema_metadata.csv'

# Call the function to generate the CSV file
generate_csv(folder_path, csv_filename)
