## **Ferry Data Transfer**

**Let's Know The Database**

In [None]:
import pandas as pd
data=pd.read_csv('data.csv')


In [None]:
print(f"Number of samples: {len(data)}")


### **DuckDB To DuckDB**

**Create DuckDB storage**

In [None]:
import duckdb
import pandas as pd

# data = pd.read_csv('data.csv')  
# Connect to DuckDB and create a database
con = duckdb.connect("stroke.duckdb")

# Store the DataFrame into DuckDB
con.execute("CREATE TABLE IF NOT EXISTS stroke AS SELECT * FROM data")
result = con.execute("SELECT * FROM stroke LIMIT 5").fetchdf()
print(result)
# Fetch and display some data
count = con.execute("SELECT COUNT(*) FROM stroke").fetchdf()
print("Count of samples",count)


# Close the connection
con.close()


### **Check the destination DuckDB database**

In [None]:
import duckdb

# Connect to the DuckDB database
conn_dest_duckdb = duckdb.connect(database='destination1_duckdb.duckdb')  # Persistent DB

# Select the schema
conn_dest_duckdb.execute("SET search_path TO my_dataset;")

# Query first 5 samples
query = "SELECT * FROM my_output_table LIMIT 5;"
result = conn_dest_duckdb.execute(query).fetchall()

# Count total number of samples
count_samples_in_dest_duckdb = conn_dest_duckdb.execute("SELECT COUNT(*) FROM my_output_table").fetchone()[0]

# Print results
print(f"Total count of samples in destination DuckDB: {count_samples_in_dest_duckdb}")
print("First 5 samples in destination DuckDB:")
for row in result:
    print(row)

# Close the connection
conn_dest_duckdb.close()


## **Postgresql Database**

### **Creation of Postgresql**

In [None]:
from sqlalchemy import create_engine
import pandas as pd

# PostgreSQL Connection URI (Modify with your credentials)
DB_URI = "postgresql://postgres:1234@127.0.0.1:5432/my_database"

# Create SQLAlchemy Engine
engine = create_engine(DB_URI)

# Fetch first 5 samples
with engine.connect() as conn:
    df_samples = pd.read_sql("SELECT * FROM stroke LIMIT 5;", conn)
    print("First 5 samples:")
    print(df_samples)

    # Get total count of samples
    count_df = pd.read_sql("SELECT COUNT(*) AS total_samples FROM stroke;", conn)
    total_samples = count_df["total_samples"][0]  # Extract count value
    print(f"Total count of samples in PostgreSQL: {total_samples}")


### **Check the Destination Duckdb from Postgresql**

In [None]:
import duckdb

# Connect to the DuckDB database
conn_dest__postgresql_duckdb = duckdb.connect(database='stroke_postgres_duckdb.duckdb')  # Persistent DB

# Select the schema
conn_dest__postgresql_duckdb.execute("SET search_path TO postgres_to_duckdb_dataset;")

# Query first 5 samples
query = "SELECT * FROM my_table LIMIT 5;"
result = conn_dest__postgresql_duckdb.execute(query).fetchall()

# Count total number of samples
count_samples_in_dest_duckdb = conn_dest__postgresql_duckdb.execute("SELECT COUNT(*) FROM my_table").fetchone()[0]

# Print results
print(f"Total count of samples in destination DuckDB: {count_samples_in_dest_duckdb}")
print("First 5 samples in destination DuckDB:")
for row in result:
    print(row)

# Close the connection
conn_dest__postgresql_duckdb.close()


## **Conclusion**
Data is Transfered from:
- DuckDB --> DuckDB
- PostgreSQL --> DuckDB
