In [1]:
! pip install pymongo
! pip install dotenv




[notice] A new release of pip is available: 25.1.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip





[notice] A new release of pip is available: 25.1.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [4]:
import requests
from pymongo import MongoClient
import logging

#configure logging
logging.basicConfig(level=logging.INFO)
#create a logger
logger = logging.getLogger(__name__)

import os
from dotenv import load_dotenv

load_dotenv('.env')

MONGO_URI = os.getenv("MONGO_URI")
logger.info(f"MongoDB URI: {MONGO_URI}")
# 1. API Endpoint (get real data from a public API)
API_URL = "https://jsonplaceholder.typicode.com/posts"

# 2. MongoDB Config (Update as needed)
DATABASE_NAME = "mydatabase"
COLLECTION_NAME = "posts"

def fetch_data_from_api(url):
    """Fetch data from the given API endpoint."""
    try:
        response = requests.get(url)
        response.raise_for_status()
        #print(f"✅ Data fetched successfully from API")
        logger.info("Data fetched successfully from API")
        return response.json()
    except requests.RequestException as e:
        #print(f"❌ Error fetching data: {e}")
        logger.error(f"Error fetching data: {e}")
        return []

def load_data_to_mongodb(data, db_name, collection_name):
    """Insert data into MongoDB collection.
    if data already exists, it will not insert duplicates based on '_id' field."""
    try:
        client = MongoClient(MONGO_URI)
        db = client[db_name]
        collection = db[collection_name]

        if data:
            # Insert data, ignoring duplicates based on '_id'
            for record in data:
                try:
                    collection.insert_one(record)
                except Exception as e:
                    logger.warning(f"Duplicate or insertion error for record {record.get('_id')}: {e}")
            print(f"✅ Data loaded into MongoDB collection '{collection_name}'")
            logger.info(f"Data loaded into MongoDB collection '{collection_name}'")
        else:
            print("⚠️ No data to load into MongoDB.")
            logger.warning("No data to load into MongoDB.")

    except Exception as e:
        print(f"❌ MongoDB Error: {e}")
        logger.error(f"MongoDB Error: {e}")

### write a function that   READ data from MongoDB to verify insertion and create a dataframe from it
def read_data_from_mongodb(db_name, collection_name):
    """Read data from MongoDB collection and return as a list."""
    try:
        client = MongoClient(MONGO_URI)
        db = client[db_name]
        collection = db[collection_name]

        data = list(collection.find())
        print(f"✅ Retrieved {len(data)} documents from '{collection_name}' collection.")
        logger.info(f"Retrieved {len(data)} documents from '{collection_name}' collection.")
        return data

    except Exception as e:
        print(f"❌ MongoDB Error: {e}")
        logger.error(f"MongoDB Error: {e}")
        return []

##  create a dataframe from the data
import pandas as pd
def create_dataframe(data):
    """Create a pandas DataFrame from the given data."""
    if data:
        df = pd.DataFrame(data)
        #print(f"✅ DataFrame created with {len(df)} records.")
        logger.info(f"DataFrame created with {len(df)} records.")
        #print(df.head())  # Display the first few records
        return df
    else:
        #print("⚠️ No data available to create DataFrame.")
        logger.warning("No data available to create DataFrame.")
        return pd.DataFrame()
    
#create csv file from dataframe
def save_dataframe_to_csv(df, filename):
    """Save the given DataFrame to a CSV file."""
    if not df.empty:
        df.to_csv(filename, index=False)
        print(f"✅ DataFrame saved to '{filename}'.")
        logger.info(f"DataFrame saved to '{filename}'.")
    else:
        print("⚠️ DataFrame is empty. No CSV file created.")
        logger.warning("DataFrame is empty. No CSV file created.")


if __name__ == "__main__":
    logger.info("Starting the data pipeline...")
    api_data = fetch_data_from_api(API_URL)
    logger.info(f"Fetched {len(api_data)} records from API.")
    logger.info("Loading data into MongoDB...")
    load_data_to_mongodb(api_data, DATABASE_NAME, COLLECTION_NAME)
    logger.info("Reading data back from MongoDB...")
    read_data_from_mongodb(DATABASE_NAME, COLLECTION_NAME)
    logger.info("Creating DataFrame from MongoDB data...")
    create_dataframe(api_data)
    save_dataframe_to_csv(create_dataframe(api_data), "output.csv")
    logger.info("Data pipeline completed successfully.")
    #print("✅ Data pipeline completed successfully.")
    

INFO:__main__:MongoDB URI: mongodb+srv://bhalekarganesh40_db_user:xr4KD20taBcpwPC8@mymongo.xhqjajg.mongodb.net/
INFO:__main__:Starting the data pipeline...


INFO:__main__:Data fetched successfully from API
INFO:__main__:Fetched 100 records from API.
INFO:__main__:Loading data into MongoDB...
INFO:__main__:Data loaded into MongoDB collection 'posts'
INFO:__main__:Reading data back from MongoDB...


✅ Data loaded into MongoDB collection 'posts'


INFO:__main__:Retrieved 300 documents from 'posts' collection.
INFO:__main__:Creating DataFrame from MongoDB data...
INFO:__main__:DataFrame created with 100 records.
INFO:__main__:DataFrame created with 100 records.
INFO:__main__:DataFrame saved to 'output.csv'.
INFO:__main__:Data pipeline completed successfully.


✅ Retrieved 300 documents from 'posts' collection.
✅ DataFrame saved to 'output.csv'.


In [5]:

def dbconnection():
    try:
        client = MongoClient(MONGO_URI)
        db = client['sample_mflix']
        collection = db['movies']

        data = list(collection.find())
        print("✅ Connected to MongoDB 'movies' database.")
        logger.info("Connected to MongoDB 'sample_mflix' database.")
        return collection
    except Exception as e:
        print(f"❌ MongoDB Connection Error: {e}")
        logger.error(f"MongoDB Connection Error: {e}")
        return None
    
# fetch all documents from movies collection
def fetch_all_movies(collection):
    try:
        movies = list(collection.find())
        print(f"✅ Retrieved {len(movies)} movies from 'movies' collection.")
        logger.info(f"Retrieved {len(movies)} movies from 'movies' collection.")
        return movies
    except Exception as e:
        print(f"❌ Error fetching movies: {e}")
        logger.error(f"Error fetching movies: {e}")
        return []

# create a dataframe from movies collection
def movies_dataframe(movies):
    if movies:
        df = pd.DataFrame(movies)
        print(f"✅ Movies DataFrame created with {len(df)} records.")
        logger.info(f"Movies DataFrame created with {len(df)} records.")
        print(df.head())  # Display the first few records
        return df
    else:
        print("⚠️ No movies data available to create DataFrame.")
        logger.warning("No movies data available to create DataFrame.")
        return pd.DataFrame()
    
if __name__ == "__main__":
    ''' established mongo connection 
    fetched the all movies from movies collection
    and create df
    '''
    collection = dbconnection()
    movies = fetch_all_movies(collection)
    movies_df = movies_dataframe(movies)





INFO:__main__:Connected to MongoDB 'sample_mflix' database.


✅ Connected to MongoDB 'movies' database.


INFO:__main__:Retrieved 21349 movies from 'movies' collection.
INFO:__main__:Movies DataFrame created with 21349 records.


✅ Retrieved 21349 movies from 'movies' collection.
✅ Movies DataFrame created with 21349 records.
                        _id  \
0  573a1390f29313caabcd42e8   
1  573a1390f29313caabcd446f   
2  573a1390f29313caabcd4803   
3  573a1390f29313caabcd4eaf   
4  573a1390f29313caabcd50e5   

                                                plot  \
0  A group of bandits stage a brazen train hold-u...   
1  A greedy tycoon decides, on a whim, to corner ...   
2  Cartoon figures announce, via comic strip ball...   
3  A woman, with the aid of her police officer sw...   
4  The cartoonist, Winsor McCay, brings the Dinos...   

                       genres  runtime  \
0            [Short, Western]     11.0   
1              [Short, Drama]     14.0   
2  [Animation, Short, Comedy]      7.0   
3              [Crime, Drama]     88.0   
4  [Animation, Short, Comedy]     12.0   

                                                cast  \
0  [A.C. Abadie, Gilbert M. 'Broncho Billy' Ander...   
1  [Frank Pow

In [39]:
# data analysis for movies df
movies_df.head()

Unnamed: 0,_id,plot,genres,runtime,cast,poster,title,fullplot,languages,released,...,awards,lastupdated,year,imdb,countries,type,tomatoes,num_mflix_comments,writers,metacritic
0,573a1390f29313caabcd42e8,A group of bandits stage a brazen train hold-u...,"[Short, Western]",11.0,"[A.C. Abadie, Gilbert M. 'Broncho Billy' Ander...",https://m.media-amazon.com/images/M/MV5BMTU3Nj...,The Great Train Robbery,Among the earliest existing films in American ...,[English],1903-12-01,...,"{'wins': 1, 'nominations': 0, 'text': '1 win.'}",2015-08-13 00:27:59.177000000,1903,"{'rating': 7.4, 'votes': 9847, 'id': 439}",[USA],movie,"{'viewer': {'rating': 3.7, 'numReviews': 2559,...",0,,
1,573a1390f29313caabcd446f,"A greedy tycoon decides, on a whim, to corner ...","[Short, Drama]",14.0,"[Frank Powell, Grace Henderson, James Kirkwood...",,A Corner in Wheat,"A greedy tycoon decides, on a whim, to corner ...",[English],1909-12-13,...,"{'wins': 1, 'nominations': 0, 'text': '1 win.'}",2015-08-13 00:46:30.660000000,1909,"{'rating': 6.6, 'votes': 1375, 'id': 832}",[USA],movie,"{'viewer': {'rating': 3.6, 'numReviews': 109, ...",1,,
2,573a1390f29313caabcd4803,"Cartoon figures announce, via comic strip ball...","[Animation, Short, Comedy]",7.0,[Winsor McCay],https://m.media-amazon.com/images/M/MV5BYzg2Nj...,"Winsor McCay, the Famous Cartoonist of the N.Y...",Cartoonist Winsor McCay agrees to create a lar...,[English],1911-04-08,...,"{'wins': 1, 'nominations': 0, 'text': '1 win.'}",2015-08-29 01:09:03.030000000,1911,"{'rating': 7.3, 'votes': 1034, 'id': 1737}",[USA],movie,"{'viewer': {'rating': 3.4, 'numReviews': 89, '...",0,"[Winsor McCay (comic strip ""Little Nemo in Slu...",
3,573a1390f29313caabcd4eaf,"A woman, with the aid of her police officer sw...","[Crime, Drama]",88.0,"[Jane Gail, Ethel Grandin, William H. Turner, ...",https://m.media-amazon.com/images/M/MV5BYzk0YW...,Traffic in Souls,,[English],1913-11-24,...,"{'wins': 1, 'nominations': 0, 'text': '1 win.'}",2015-09-15 02:07:14.247000000,1913,"{'rating': 6.0, 'votes': 371, 'id': 3471}",[USA],movie,"{'viewer': {'rating': 3.0, 'numReviews': 85, '...",1,,
4,573a1390f29313caabcd50e5,"The cartoonist, Winsor McCay, brings the Dinos...","[Animation, Short, Comedy]",12.0,"[Winsor McCay, George McManus, Roy L. McCardell]",https://m.media-amazon.com/images/M/MV5BMTQxNz...,Gertie the Dinosaur,Winsor Z. McCay bets another cartoonist that h...,[English],1914-09-15,...,"{'wins': 1, 'nominations': 0, 'text': '1 win.'}",2015-08-18 01:03:15.313000000,1914,"{'rating': 7.3, 'votes': 1837, 'id': 4008}",[USA],movie,"{'viewer': {'rating': 3.7, 'numReviews': 29}, ...",0,[Winsor McCay],
