<a href="https://colab.research.google.com/github/sreent/data-management-intro/blob/main/Lectures/CM3010%20September%202023.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## **Section 1: (Q1) Linked Data (RDF + SPARQL)**

<details>
<summary>Click to expand the RDF + SPARQL demo</summary>

### 1.1 Install `rdflib` and Prepare a Minimal RDF Graph

In [None]:
!pip install rdflib --quiet

import rdflib
from rdflib.plugins.sparql import prepareQuery

# Minimal Turtle data for 'post' in English (similar to BabelNet/Lemon)
turtle_data = '''
@prefix bn: <http://babelnet.org/rdf/> .
@prefix lemon: <http://www.lemon-model.net/lemon#> .
@prefix lexinfo: <http://www.lexinfo.net/ontology/2.0/lexinfo#> .

bn:post_n_EN a lemon:LexicalEntry ;
    lemon:canonicalForm bn:post_n_EN_form ;
    lemon:language "EN" ;
    lexinfo:partOfSpeech lexinfo:noun .

bn:post_n_EN_form lemon:writtenRep "post" .
'''

In [None]:
# Create RDF graph in-memory
g = rdflib.Graph()
g.parse(data=turtle_data, format="turtle")
print(f"RDF graph loaded with {len(g)} triples.")

### 1.2 Run SPARQL Queries

In [None]:
# Example 1: get the written representation & language for all nouns
query_all_nouns = prepareQuery('''
PREFIX lemon:   <http://www.lemon-model.net/lemon#>
PREFIX lexinfo: <http://www.lexinfo.net/ontology/2.0/lexinfo#>
SELECT ?writtenRep ?lang
WHERE {
  ?lexEntry a lemon:LexicalEntry ;
            lemon:canonicalForm ?form ;
            lemon:language ?lang ;
            lexinfo:partOfSpeech lexinfo:noun .
  ?form lemon:writtenRep ?writtenRep .
}
''')

print("All Nouns in the Graph:")
for row in g.query(query_all_nouns):
    print(" - Noun:", row.writtenRep, "| Language:", row.lang)


In [None]:
# Example 2: for words whose canonical form is "post", get language & part of speech
query_post = prepareQuery('''
PREFIX lemon:   <http://www.lemon-model.net/lemon#>
PREFIX lexinfo: <http://www.lexinfo.net/ontology/2.0/lexinfo#>
SELECT ?language ?pos
WHERE {
  ?lexEntry a lemon:LexicalEntry ;
            lemon:canonicalForm ?form ;
            lemon:language ?language ;
            lexinfo:partOfSpeech ?pos .
  ?form lemon:writtenRep "post" .
}
''')

print("\nDetails for 'post':")
for row in g.query(query_post):
    print(" - Language:", row.language, "| POS:", row.pos)

## **Section 2: (Q2) Real Estate in MySQL**

<details>
<summary>Click to expand MySQL installation, table creation, data insertion, and queries</summary>

### 2.1 Install and Start MySQL on Colab

In [None]:
# Install MySQL server
!apt-get -qq update
!DEBIAN_FRONTEND=noninteractive apt-get -y -qq install mysql-server > /dev/null
!service mysql start

# Create user & database
!mysql -e "CREATE DATABASE IF NOT EXISTS RealEstateDB;"
!mysql -e "CREATE USER IF NOT EXISTS 'estateuser'@'localhost' IDENTIFIED BY 'estatepass';"
!mysql -e "GRANT ALL PRIVILEGES ON RealEstateDB.* TO 'estateuser'@'localhost';"
!mysql -e "FLUSH PRIVILEGES;"

print("MySQL environment is set up. Database 'RealEstateDB' created.")

### 2.2 Install Python libs to connect to MySQL

In [None]:
!pip install -q ipython-sql sqlalchemy==2.0.20 pymysql==1.1.0
%reload_ext sql

# Connect to RealEstateDB
%sql mysql+pymysql://estateuser:estatepass@localhost/RealEstateDB

### 2.3 Create the Real Estate Tables

We assume **no numeric IDs** if that’s how your exam diagram is defined:


In [None]:
%%sql
USE RealEstateDB;

DROP TABLE IF EXISTS Views;
DROP TABLE IF EXISTS Offers;
DROP TABLE IF EXISTS Property;
DROP TABLE IF EXISTS Seller;
DROP TABLE IF EXISTS EstateAgent;
DROP TABLE IF EXISTS Buyer;

-- Seller
CREATE TABLE Seller (
  Name        VARCHAR(100) PRIMARY KEY,
  Address     VARCHAR(200),
  PhoneNumber VARCHAR(50)
);

-- EstateAgent
CREATE TABLE EstateAgent (
  Name VARCHAR(100) PRIMARY KEY
);

-- Property
CREATE TABLE Property (
  Address     VARCHAR(200) PRIMARY KEY,
  Type        VARCHAR(50),
  Bedrooms    INT,
  AskingPrice DECIMAL(12,2),
  SellerName  VARCHAR(100) NOT NULL,
  AgentName   VARCHAR(100) NOT NULL,
  FOREIGN KEY (SellerName) REFERENCES Seller(Name),
  FOREIGN KEY (AgentName)  REFERENCES EstateAgent(Name)
);

-- Buyer
CREATE TABLE Buyer (
  Name        VARCHAR(100) PRIMARY KEY,
  Address     VARCHAR(200),
  PhoneNumber VARCHAR(50)
);

-- Offers (composite PK)
CREATE TABLE Offers (
  OfferDate DATE,
  OfferStatus VARCHAR(50),
  OfferValue DECIMAL(12,2),
  PropertyAddress VARCHAR(200) NOT NULL,
  BuyerName VARCHAR(100) NOT NULL,
  PRIMARY KEY (PropertyAddress, BuyerName, OfferDate),
  FOREIGN KEY (PropertyAddress) REFERENCES Property(Address),
  FOREIGN KEY (BuyerName)       REFERENCES Buyer(Name)
);

-- Views (composite PK)
CREATE TABLE Views (
  ViewDate DATE,
  PropertyAddress VARCHAR(200) NOT NULL,
  BuyerName VARCHAR(100) NOT NULL,
  PRIMARY KEY (PropertyAddress, BuyerName, ViewDate),
  FOREIGN KEY (PropertyAddress) REFERENCES Property(Address),
  FOREIGN KEY (BuyerName)       REFERENCES Buyer(Name)
);

### 2.4 Insert Sample Data

In [None]:
%%sql
USE RealEstateDB;

INSERT INTO Seller VALUES
('Alice Seller','1 Seller St','555-111'),
('Bob Seller','2 Seller Rd','555-222');

INSERT INTO EstateAgent VALUES
('AgentGrace'),
('AgentHeidi');

INSERT INTO Property VALUES
('10 Main Street','Flat',2,250000,'Alice Seller','AgentGrace'),
('20 Baker Avenue','Terraced House',3,350000,'Bob Seller','AgentHeidi');

INSERT INTO Buyer VALUES
('Charlie Buyer','99 Buyer Rd','555-333'),
('Doris Buyer','100 Buyer Ln','555-444');

INSERT INTO Offers VALUES
('2023-01-05','sale completed',240000,'10 Main Street','Charlie Buyer'),
('2023-01-10','rejected',230000,'10 Main Street','Doris Buyer'),
('2023-02-01','sale completed',340000,'20 Baker Avenue','Doris Buyer');

INSERT INTO Views VALUES
('2023-01-03','10 Main Street','Charlie Buyer'),
('2023-01-04','10 Main Street','Doris Buyer'),
('2023-01-20','20 Baker Avenue','Doris Buyer');

SELECT 'Data inserted into RealEstateDB' AS info;

### 2.5 Commission Calculation Queries

**(i)** Commission per agent since 1 Jan 2023 (Q2e):

In [None]:
%%sql
USE RealEstateDB;

SELECT p.AgentName AS EstateAgent,
       SUM(o.OfferValue * 0.01) AS TotalCommission
FROM Property p
JOIN Offers o ON p.Address = o.PropertyAddress
WHERE o.OfferStatus = 'sale completed'
  AND o.OfferDate >= '2023-01-01'
GROUP BY p.AgentName;

**(ii)** Top-earning agent:

In [None]:
%%sql
USE RealEstateDB;

SELECT p.AgentName AS EstateAgent,
       SUM(o.OfferValue * 0.01) AS TotalCommission
FROM Property p
JOIN Offers o ON p.Address = o.PropertyAddress
WHERE o.OfferStatus = 'sale completed'
  AND o.OfferDate >= '2023-01-01'
GROUP BY p.AgentName
ORDER BY TotalCommission DESC
LIMIT 1;

## **Section 3: (Q3) IR/Document DB (MongoDB)**

Below we’ll **install MongoDB** in Colab, start it up, create a client, then load a small “books” collection (like the IR question about language detection, searching for “Strudel,” etc.).

<details>
<summary>Click to see full MongoDB setup + queries</summary>

### 3.1 Install and Launch MongoDB on Colab

In [None]:
# Install MongoDB dependencies in Colab
!sudo wget http://archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2_amd64.deb
!sudo dpkg -i libssl1.1_1.1.1f-1ubuntu2_amd64.deb

# Add MongoDB repository for Ubuntu bionic
!wget -qO - https://www.mongodb.org/static/pgp/server-4.4.asc | apt-key add -
!echo "deb [ arch=amd64,arm64 ] http://repo.mongodb.org/apt/ubuntu bionic/mongodb-org/4.4 multiverse" | tee /etc/apt/sources.list.d/mongodb-org-4.4.list
!apt-get update > /dev/null
!apt-get install -y mongodb-org > /dev/null

# Install pymongo to interact with MongoDB
!pip install -q pymongo

# Create data folder, start mongod as a background process
!mkdir -p /data/db
!mongod --fork --logpath /var/log/mongodb.log --dbpath /data/db

print("MongoDB is installed and running in the background.")

### 3.2 Connect with PyMongo and Insert Sample “Books”

In [None]:
from pymongo import MongoClient
import pprint

# Connect to local MongoDB
client = MongoClient('localhost', 27017)
print("Connected to MongoDB. List of databases now:", client.list_database_names())

# We'll create or use a DB called 'booksdb'
db = client['booksdb']
collection = db['books']

# Clear any existing data if re-running
collection.delete_many({})

# Insert a small set of books matching your IR question scenario
books_data = [
    {"title": "Book1", "lang": "German",  "year": 1850, "text": "Ein Wort Strudel..."},
    {"title": "Book2", "lang": "German",  "year": 1905, "text": "Keine Erwähnung"},
    {"title": "Book3", "lang": "English", "year": 1845, "text": "Something about strudel."},
    {"title": "Book4", "lang": "German",  "year": 1830, "text": "No mention of Strudel"},
    {"title": "Book5", "lang": "German",  "year": 1880, "text": "STRUDEL mania, so tasty!"}
]

collection.insert_many(books_data)
print("Inserted sample book docs. Now we have", collection.count_documents({}), "documents.")

### 3.3 Queries: “German,” “19th century,” “contains ‘Strudel’”

**(a) All German books**:

In [None]:
query = """
db.books.find({
  "lang": "German"
}).pretty();
"""

!mongo --quiet --eval '{query}'


**(b) 19th century (1800 ≤ year < 1900) AND `lang=German`**:

In [None]:
query = """
db.books.find({
    "lang": "German",
    "year": {"$gte": 1800, "$lt": 1900}
}).pretty();
"""

!mongo --quiet --eval '{query}'


**(c) 19th century German books containing “Strudel”**:

We can do a naive text search with a regex:

In [None]:
query = """
db.books.find({
    "lang": "German",
    "year": {"$gte": 1800, "$lt": 1900},
    "text": {"$regex": "strudel", "$options": "i"}
}).pretty();
"""

!mongo --quiet --eval '{query}'