In [None]:
############Cassandra

In [1]:
!pip install cassandra-driver pandas



In [9]:
from cassandra.cluster import Cluster
import pandas as pd


In [17]:

cluster = Cluster(['127.0.0.1'], port=9042)
session = cluster.connect()
session.set_keyspace("movies")


In [18]:
import pandas as pd
df = pd.read_csv("ratings.csv")


In [19]:
insert_query = """
INSERT INTO ratings (user_id, movie_id, rating, rating_timestamp)
VALUES (?, ?, ?, ?)
"""
prepared = session.prepare(insert_query)


In [21]:
from cassandra.query import BatchStatement

batch = BatchStatement()
count = 0

for i, row in df.iterrows():
    batch.add(prepared, (
        int(row.userId),
        int(row.movieId),
        float(row.rating),
        datetime.utcfromtimestamp(int(row.timestamp))
    ))
    count += 1
    if count % 100 == 0:
        session.execute(batch)
        batch.clear()

# Exécuter les restes
if batch:
    session.execute(batch)


In [22]:
rows = session.execute("SELECT * FROM ratings LIMIT 5")
for r in rows:
    print(r)

Row(user_id=23, movie_id=6, rating=4.0, rating_timestamp=datetime.datetime(2005, 2, 2, 11, 4, 27))
Row(user_id=23, movie_id=29, rating=4.0, rating_timestamp=datetime.datetime(2005, 2, 2, 10, 52, 54))
Row(user_id=23, movie_id=32, rating=3.5, rating_timestamp=datetime.datetime(2005, 2, 2, 10, 55, 50))
Row(user_id=23, movie_id=50, rating=4.0, rating_timestamp=datetime.datetime(2005, 1, 31, 9, 29, 1))
Row(user_id=23, movie_id=58, rating=3.0, rating_timestamp=datetime.datetime(2005, 1, 31, 9, 36, 23))


In [23]:
session.shutdown()
cluster.shutdown()


In [24]:
# Étape 7 – Nombre total d'enregistrements
cluster = Cluster(['127.0.0.1'], port=9042)
session = cluster.connect("movies")
count = session.execute("SELECT COUNT(*) FROM ratings")
print(f"Total enregistrements : {list(count)[0].count}")
session.shutdown()
cluster.shutdown()


Total enregistrements : 100839


In [1]:
##########PartieNeo4J

In [3]:
pip install neo4j


Collecting neo4j
  Downloading neo4j-5.28.1-py3-none-any.whl.metadata (5.9 kB)
Downloading neo4j-5.28.1-py3-none-any.whl (312 kB)
Installing collected packages: neo4j
Successfully installed neo4j-5.28.1
Note: you may need to restart the kernel to use updated packages.


In [8]:
import csv
from neo4j import GraphDatabase


In [9]:
URI = "bolt://localhost:7687"
USER = "neo4j"
PASSWORD = "idriss123"

driver = GraphDatabase.driver(URI, auth=(USER, PASSWORD))


In [10]:
with open('ratings.csv', 'r', encoding='utf-8') as f:
    reader = csv.reader(f)
    next(reader)  # Ignorer l'en-tête
    data = list(reader)

print(f"{len(data)} lignes lues.")


100836 lignes lues.


In [11]:
with driver.session() as session:
    count = 0
    for row in data:
        user_id = int(row[0])
        movie_id = int(row[1])
        rating = float(row[2])
        timestamp = int(row[3])

        query = """
        MERGE (u:User {id: $user_id})
        MERGE (m:Movie {id: $movie_id})
        MERGE (u)-[r:RATED]->(m)
        SET r.rating = $rating, r.timestamp = $timestamp
        """
        session.run(query, user_id=user_id, movie_id=movie_id, rating=rating, timestamp=timestamp)
        count += 1

print(f"{count} relations RATED insérées.")


100836 relations RATED insérées.


In [12]:
with driver.session() as session:
    result = session.run("""
        MATCH (u:User)-[r:RATED]->(m:Movie)
        RETURN u.id AS userId, r.rating AS rating, m.id AS movieId
        LIMIT 5
    """)
    for record in result:
        print(record)


<Record userId=1 rating=4.0 movieId=1>
<Record userId=1 rating=4.0 movieId=3>
<Record userId=1 rating=4.0 movieId=6>
<Record userId=1 rating=5.0 movieId=47>
<Record userId=1 rating=5.0 movieId=50>


In [13]:
driver.close()


In [None]:
################"elasticsearch

In [14]:
pip install elasticsearch


Collecting elasticsearchNote: you may need to restart the kernel to use updated packages.

  Downloading elasticsearch-9.0.1-py3-none-any.whl.metadata (8.5 kB)
Collecting elastic-transport<9,>=8.15.1 (from elasticsearch)
  Downloading elastic_transport-8.17.1-py3-none-any.whl.metadata (3.8 kB)
Downloading elasticsearch-9.0.1-py3-none-any.whl (905 kB)
   ---------------------------------------- 0.0/905.5 kB ? eta -:--:--
   ----------- ---------------------------- 262.1/905.5 kB ? eta -:--:--
   ---------------------------------------- 905.5/905.5 kB 3.4 MB/s eta 0:00:00
Downloading elastic_transport-8.17.1-py3-none-any.whl (64 kB)
Installing collected packages: elastic-transport, elasticsearch
Successfully installed elastic-transport-8.17.1 elasticsearch-9.0.1


In [30]:
import csv
from elasticsearch import Elasticsearch, helpers
import pandas as pd


In [31]:
es = Elasticsearch("http://localhost:9200")

# Vérification
if es.ping():
    print(" Connexion Elasticsearch réussie")
else:
    print(" Connexion échouée")


 Connexion Elasticsearch réussie


In [32]:
df = pd.read_csv("ratings.csv")
df.head()


Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [33]:
def doc_generator(df):
    for index, document in df.iterrows():
        yield {
            "_index": "ratings",
            "_id": f"{document['userId']}_{document['movieId']}",
            "_source": {
                "userId": int(document["userId"]),
                "movieId": int(document["movieId"]),
                "rating": float(document["rating"]),
                "timestamp": int(document["timestamp"])
            },
        }


In [34]:
helpers.bulk(es, doc_generator(df))
print(f" Insertion terminée : {len(df)} documents")


 Insertion terminée : 100836 documents


In [35]:
res = es.search(index="ratings", query={"match_all": {}}, size=5)

for hit in res["hits"]["hits"]:
    print(hit["_source"])


{'userId': 1, 'movieId': 1, 'rating': 4.0, 'timestamp': 964982703}
{'userId': 1, 'movieId': 3, 'rating': 4.0, 'timestamp': 964981247}
{'userId': 1, 'movieId': 6, 'rating': 4.0, 'timestamp': 964982224}
{'userId': 1, 'movieId': 47, 'rating': 5.0, 'timestamp': 964983815}
{'userId': 1, 'movieId': 50, 'rating': 5.0, 'timestamp': 964982931}


In [36]:
query = {
  "size": 0,
  "aggs": {
    "moyenne_par_film": {
      "terms": { "field": "movieId", "size": 10 },
      "aggs": {
        "note_moyenne": { "avg": { "field": "rating" } }
      }
    }
  }
}

res = es.search(index="ratings", body=query)
for bucket in res["aggregations"]["moyenne_par_film"]["buckets"]:
    print(f"Film {bucket['key']} - Moyenne : {bucket['note_moyenne']['value']:.2f}")


Film 356 - Moyenne : 4.18
Film 318 - Moyenne : 4.44
Film 296 - Moyenne : 4.19
Film 593 - Moyenne : 4.15
Film 2571 - Moyenne : 4.18
Film 260 - Moyenne : 4.23
Film 480 - Moyenne : 3.76
Film 110 - Moyenne : 4.06
Film 589 - Moyenne : 3.98
Film 527 - Moyenne : 4.23


In [38]:
query = {
  "size": 0,
  "aggs": {
    "moyenne_par_user": {
      "terms": { "field": "userId", "size": 5 },
      "aggs": {
        "note_moyenne": { "avg": { "field": "rating" } }
      }
    }
  }
}

res = es.search(index="ratings", body=query)
for bucket in res["aggregations"]["moyenne_par_user"]["buckets"]:
    print(f" Utilisateur {bucket['key']} – Moyenne : {bucket['note_moyenne']['value']:.2f}")


 Utilisateur 414 – Moyenne : 3.39
 Utilisateur 599 – Moyenne : 2.64
 Utilisateur 474 – Moyenne : 3.40
 Utilisateur 448 – Moyenne : 2.85
 Utilisateur 274 – Moyenne : 3.24


In [40]:
res = es.search(
    index="ratings",
    query={
        "range": {
            "rating": { "gt": 4.5 }
        }
    },
    size=5
)

for hit in res["hits"]["hits"]:
    print(hit["_source"])


{'userId': 1, 'movieId': 47, 'rating': 5.0, 'timestamp': 964983815}
{'userId': 1, 'movieId': 50, 'rating': 5.0, 'timestamp': 964982931}
{'userId': 1, 'movieId': 101, 'rating': 5.0, 'timestamp': 964980868}
{'userId': 1, 'movieId': 151, 'rating': 5.0, 'timestamp': 964984041}
{'userId': 1, 'movieId': 157, 'rating': 5.0, 'timestamp': 964984100}


In [None]:
##############MongoDB

In [25]:
pip install pymongo


Collecting pymongo
  Obtaining dependency information for pymongo from https://files.pythonhosted.org/packages/45/bd/d6286b923e852dc080330182a8b57023555870d875b7523454ad1bdd1579/pymongo-4.13.0-cp311-cp311-win_amd64.whl.metadata
  Downloading pymongo-4.13.0-cp311-cp311-win_amd64.whl.metadata (22 kB)
Collecting dnspython<3.0.0,>=1.16.0 (from pymongo)
  Obtaining dependency information for dnspython<3.0.0,>=1.16.0 from https://files.pythonhosted.org/packages/68/1b/e0a87d256e40e8c888847551b20a017a6b98139178505dc7ffb96f04e954/dnspython-2.7.0-py3-none-any.whl.metadata
  Using cached dnspython-2.7.0-py3-none-any.whl.metadata (5.8 kB)
Downloading pymongo-4.13.0-cp311-cp311-win_amd64.whl (848 kB)
   ---------------------------------------- 0.0/848.1 kB ? eta -:--:--
   ---------------------------------------- 10.2/848.1 kB ? eta -:--:--
   - ------------------------------------- 41.0/848.1 kB 487.6 kB/s eta 0:00:02
   ----- ---------------------------------- 122.9/848.1 kB 1.0 MB/s eta 0:00:01


In [26]:
from pymongo import MongoClient
import csv


In [27]:
client = MongoClient("mongodb://localhost:27017")
db = client["moviesdb"]
ratings_collection = db["ratings"]


In [28]:
documents = []
with open("ratings.csv", "r", encoding="utf-8") as f:
    reader = csv.reader(f)
    header = next(reader)
    for row in reader:
        doc = {
            "userId": int(row[0]),
            "movieId": int(row[1]),
            "rating": float(row[2]),
            "timestamp": int(row[3])
        }
        documents.append(doc)


In [29]:
if documents:
    result = ratings_collection.insert_many(documents)
    print(f"Documents insérés : {len(result.inserted_ids)}")
else:
    print("Aucune donnée à insérer.")


Documents insérés : 100836


In [30]:
for doc in ratings_collection.find().limit(5):
    print(doc)


{'_id': ObjectId('6833d910eb7d5b40dc80cbaa'), 'userId': 1, 'movieId': 1, 'rating': 4.0, 'timestamp': 964982703}
{'_id': ObjectId('6833d910eb7d5b40dc80cbab'), 'userId': 1, 'movieId': 3, 'rating': 4.0, 'timestamp': 964981247}
{'_id': ObjectId('6833d910eb7d5b40dc80cbac'), 'userId': 1, 'movieId': 6, 'rating': 4.0, 'timestamp': 964982224}
{'_id': ObjectId('6833d910eb7d5b40dc80cbad'), 'userId': 1, 'movieId': 47, 'rating': 5.0, 'timestamp': 964983815}
{'_id': ObjectId('6833d910eb7d5b40dc80cbae'), 'userId': 1, 'movieId': 50, 'rating': 5.0, 'timestamp': 964982931}


In [31]:
client.close()
