## Setup Cassandra

In [2]:
from cassandra.cluster import Cluster
from cassandra.policies import RetryPolicy
import time
import sys

def wait_for_cassandra(max_retries=30):
    """Wait for Cassandra to be ready."""
    for i in range(max_retries):
        try:
            cluster = Cluster(['localhost'], port=9042)
            session = cluster.connect()
            print("Cassandra is ready!")
            cluster.shutdown()
            return True
        except Exception as e:
            print(f"Waiting for Cassandra... ({i+1}/{max_retries})")
            time.sleep(2)
    return False

## Create Table(s) Schema

In [3]:
def initialize_schema():
    """Create keyspace and table."""
    cluster = Cluster(['localhost'], port=9042)
    session = cluster.connect()

    # Create keyspace
    print("Creating keyspace 'movie_ratings'...")
    session.execute("""
        CREATE KEYSPACE IF NOT EXISTS movie_ratings
        WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 1}
    """)
    print("Keyspace created!!")

    print("Dropping existing table if present...")
    try:
        session.execute("DROP TABLE IF EXISTS movie_ratings.ratings")
        print("Old table dropped")
    except Exception as e:
        print(f"No existing table: {e}")

    print("Creating table 'ratings'...")
    session.execute("""
        CREATE TABLE movie_ratings.ratings (
            "UserId" int,
            "MovieId" int,
            "Rating" double,
            "Timestamp" bigint,
            "kafka_timestamp" timestamp,
            PRIMARY KEY ("UserId", "MovieId", "Timestamp")
        )
    """)
    print("Table created")

    # Verify the schema
    print("\nVerifying column names...")
    session.set_keyspace('movie_ratings')
    
    rows = session.execute("""
        SELECT column_name FROM system_schema.columns 
        WHERE keyspace_name='movie_ratings' AND table_name='ratings'
    """)
    
    print("Cassandra columns:")
    for row in rows:
        print(f"  - {row.column_name}")

    cluster.shutdown()
    print("\nCassandra initialization complete!")

## Run Cassandra Initialization

In [5]:
if __name__ == "__main__":
    print("Cassandra Initialization")
    
    if not wait_for_cassandra():
        print("Failed to connect to Cassandra")
        sys.exit(1)
    
    initialize_schema()

Cassandra Initialization
Cassandra is ready!
Creating keyspace 'movie_ratings'...
Keyspace created!!
Dropping existing table if present...
Old table dropped
Creating table 'ratings'...
Table created

Verifying column names...
Cassandra columns:
  - MovieId
  - Rating
  - Timestamp
  - UserId
  - kafka_timestamp

Cassandra initialization complete!
