In [None]:
import pandas as pd
import numpy as np
from py2neo import Graph, Node, Relationship

# Simulate dataset
np.random.seed(42)
n_samples = 500

occupations = ['Funzionario', 'Impiegato', 'Operaio', 'Dirigente']
provinces = ['Milano', 'Roma', 'Napoli', 'Torino',
             'Firenze', 'Bologna', 'Genova', 'Venezia']
regions = ['Lombardia', 'Lazio', 'Campania', 'Piemonte',
           'Toscana', 'Emilia-Romagna', 'Liguria', 'Veneto']
motivi_prestito = ['Acquisto Casa', 'Ristrutturazione',
                   'Auto', 'Viaggio', 'Consolidamento Debiti']

data = {
    'IMPORTO_RICHIESTO': np.random.randint(1000, 15001, n_samples),
    'TIPO DI OCCUPAZIONE': np.random.choice(occupations, n_samples),
    'PROVINCIA': np.random.choice(provinces, n_samples),
    'CONSENSO_DATI_PRIVACY': np.random.choice(['si', 'no'], n_samples),
    'CONSENSO_DATI_MRKTG': np.random.choice(['si', 'no'], n_samples),
    'CONSENSO_DATI_CESSIONE_TERZI': np.random.choice(['si', 'no'], n_samples),
    'SESSO': np.random.choice(['M', 'F'], n_samples),
    'REGIONE': np.random.choice(regions, n_samples),
    'IMPORTO_STIPENDIO_PENSIONE': np.random.randint(1500, 5001, n_samples),
    'anni_lavorativi_Category': np.random.randint(1, 41, n_samples),
    'TIPO_AZIENDA': np.random.choice(['Privata', 'Pubblica'], n_samples),
    'TEMPO_INDETERMINATO': np.random.choice(['si', 'no'], n_samples),
    'MOTIVO_DEL_PRESTITO': np.random.choice(motivi_prestito, n_samples)
}

df = pd.DataFrame(data)

# Introduce some correlation between variables and the target
df.loc[df['IMPORTO_RICHIESTO'] > 10000,'MOTIVO_DEL_PRESTITO'] = 'Acquisto Casa'
df.loc[df['IMPORTO_STIPENDIO_PENSIONE'] < 2000, 'MOTIVO_DEL_PRESTITO'] = 'Consolidamento Debiti'
df.loc[df['TIPO_AZIENDA'] == 'Privata', 'MOTIVO_DEL_PRESTITO'] = 'Auto'

# Connect to the Neo4j database
graph = Graph("bolt://localhost:7690", auth=("neo4j", "onekenoby"))

# Clear existing data
graph.delete_all()

# Create nodes and relationships
for i, row in df.iterrows():
    motivo_node = Node("Motivo", name=row['MOTIVO_DEL_PRESTITO'])
    graph.merge(motivo_node, "Motivo", "name")

    for column in df.columns[:-1]:  # Exclude MOTIVO_DEL_PRESTITO from iteration
        feature_node = Node("Feature", name=f"{column}_{row[column]}")
        graph.merge(feature_node, "Feature", "name")

        rel = Relationship(feature_node, "CORRELATES_WITH", motivo_node)
        graph.merge(rel)

print("Data successfully uploaded to Neo4j!")

In [None]:
from neo4j import GraphDatabase

# Set Neo4j credentials and URI
uri = "bolt://localhost:7690"  # Default local URI for Neo4j
user = "neo4j"
password = "onekenoby"

# Function to test the connection


def test_neo4j_connection(uri, user, password):
    try:
        # Create a driver instance
        driver = GraphDatabase.driver(uri, auth=(user, password))

        # Open a session and run a simple query
        with driver.session() as session:
            result = session.run("RETURN 'Connection Successful' AS message")
            for record in result:
                print(record["message"])

        # Close the driver
        driver.close()
        print("Connection to Neo4j was successful!")

    except Exception as e:
        print("Failed to connect to Neo4j:", e)


# Test the connection
test_neo4j_connection(uri, user, password)

In [None]:
import pandas as pd
import numpy as np
from py2neo import Graph, Node, Relationship

# Simulate dataset
np.random.seed(42)
n_samples = 5000

occupations = ['Official', 'Employee', 'Worker', 'Manager']
provinces = ['Milan', 'Rome', 'Naples', 'Turin',
             'Florence', 'Bologna', 'Genoa', 'Venice']
regions = ['Lombardy', 'Lazio', 'Campania', 'Piedmont',
           'Tuscany', 'Emilia-Romagna', 'Liguria', 'Veneto']
reasons_loan = ['Home Purchase', 'Renovation',
                'Car', 'Travel', 'Debt Consolidation']

data = {
    'AMOUNT_REQUESTED': np.random.randint(10000, 1500001, n_samples),
    'TYPE OF OCCUPATION': np.random.choice(occupations, n_samples),
    'PROVINCE': np.random.choice(provinces, n_samples),
    'CONSENT_DATA_PRIVACY': np.random.choice(['si', 'no'], n_samples),
    'CONSENT_DATA_MRKTG': np.random.choice(['si', 'no'], n_samples),
    'CONSENT_DATA_ASSIGNMENT_TO_THIRD_PARTIES': np.random.choice(['si', 'no'], n_samples),
    'SEX': np.random.choice(['M', 'F'], n_samples),
    'REGION': np.random.choice(regions, n_samples),
    'SALARY_PENSION_AMOUNT': np.random.randint(1500, 5001, n_samples),
    'WORKING_YEARS_CATEGORY': np.random.randint(1, 41, n_samples),
    'COMPANY_TYPE': np.random.choice(['Private', 'Public'], n_samples),
    'PERMANENT': np.random.choice(['si', 'no'], n_samples),
    'REASON_FOR_LOAN': np.random.choice(reasons_loan, n_samples)
}

df = pd.DataFrame(data)

# Introduce some correlation between variables and the target
df.loc[df['AMOUNT_REQUESTED'] > 10000, 'REASON_FOR_LOAN'] = 'Home Purchase'
df.loc[df['SALARY_PENSION_AMOUNT'] < 2000,'REASON_FOR_LOAN'] = 'Debt Consolidation'
df.loc[df['COMPANY_TYPE'] == 'Private', 'REASON_FOR_LOAN'] = 'Car'

# Connect to the Neo4j database
graph = Graph("bolt://localhost:7690", auth=("neo4j", "onekenoby"))

# Clear existing data
graph.delete_all()

# Create nodes and relationships
for i, row in df.iterrows():
    motivo_node = Node("Motivo", name=row['REASON_FOR_LOAN'])
    graph.merge(motivo_node, "Motivo", "name")

    for column in df.columns[:-1]:  # Exclude REASON_FOR_LOAN from iteration
        feature_node = Node("Feature", name=f"{column}_{row[column]}")
        graph.merge(feature_node, "Feature", "name")

        rel = Relationship(feature_node, "CORRELATES_WITH", motivo_node)
        graph.merge(rel)

print("Data successfully uploaded to Neo4j!")

In [None]:
import pandas as pd
import numpy as np

# Simulate dataset
np.random.seed(42)
n_samples = 1000

occupations = ['Official', 'Employee', 'Worker', 'Manager']
provinces = ['Milan', 'Rome', 'Naples', 'Turin',
             'Florence', 'Bologna', 'Genoa', 'Venice']
regions = ['Lombardy', 'Lazio', 'Campania', 'Piedmont',
           'Tuscany', 'Emilia-Romagna', 'Liguria', 'Veneto']
reasons_loan = ['Home Purchase', 'Renovation',
                'Car', 'Travel', 'Debt Consolidation']

data = {
    'AMOUNT_REQUESTED': np.random.randint(1000, 15001, n_samples),
    'TYPE OF OCCUPATION': np.random.choice(occupations, n_samples),
    'PROVINCE': np.random.choice(provinces, n_samples),
    'CONSENT_DATA_PRIVACY': np.random.choice(['si', 'no'], n_samples),
    'CONSENT_DATA_MRKTG': np.random.choice(['si', 'no'], n_samples),
    'CONSENT_DATA_ASSIGNMENT_TO_THIRD_PARTIES': np.random.choice(['si', 'no'], n_samples),
    'SEX': np.random.choice(['M', 'F'], n_samples),
    'REGION': np.random.choice(regions, n_samples),
    'SALARY_PENSION_AMOUNT': np.random.randint(1500, 5001, n_samples),
    'WORKING_YEARS_CATEGORY': np.random.randint(1, 41, n_samples),
    'COMPANY_TYPE': np.random.choice(['Privata', 'Pubblica'], n_samples),
    'PERMANENT': np.random.choice(['si', 'no'], n_samples),
    'REASON_FOR_LOAN': np.random.choice(reasons_loan, n_samples)
}

df = pd.DataFrame(data)

# Save the DataFrame to a CSV file
df.to_csv("loan_data.csv", index=False)
print("CSV file 'loan_data.csv' has been saved.")

In [None]:
import pandas as pd
import numpy as np
from py2neo import Graph, Node, Relationship

# Simulate dataset
np.random.seed(42)
n_samples = 1000

occupations = ['Official', 'Employee', 'Worker', 'Manager']
provinces = ['Milan', 'Rome', 'Naples', 'Turin',
             'Florence', 'Bologna', 'Genoa', 'Venice']
regions = ['Lombardy', 'Lazio', 'Campania', 'Piedmont',
           'Tuscany', 'Emilia-Romagna', 'Liguria', 'Veneto']
reasons_loan = ['Home Purchase', 'Renovation',
                'Car', 'Travel', 'Debt Consolidation']

data = {
    'AMOUNT_REQUESTED': np.random.randint(1000, 15001, n_samples),
    'TYPE OF OCCUPATION': np.random.choice(occupations, n_samples),
    'PROVINCE': np.random.choice(provinces, n_samples),
    'CONSENT_DATA_PRIVACY': np.random.choice(['si', 'no'], n_samples),
    'CONSENT_DATA_MRKTG': np.random.choice(['si', 'no'], n_samples),
    'CONSENT_DATA_ASSIGNMENT_TO_THIRD_PARTIES': np.random.choice(['si', 'no'], n_samples),
    'SEX': np.random.choice(['M', 'F'], n_samples),
    'REGION': np.random.choice(regions, n_samples),
    'SALARY_PENSION_AMOUNT': np.random.randint(1500, 5001, n_samples),
    'WORKING_YEARS_CATEGORY': np.random.randint(1, 41, n_samples),
    'COMPANY_TYPE': np.random.choice(['Privata', 'Pubblica'], n_samples),
    'PERMANENT': np.random.choice(['si', 'no'], n_samples),
    'REASON_FOR_LOAN': np.random.choice(reasons_loan, n_samples)
}

df = pd.DataFrame(data)

df.to_csv("loan_data.csv", index=False)
print("CSV file 'loan_data.csv' has been saved.")


# Introduce some correlation between variables and the target
df.loc[df['AMOUNT_REQUESTED'] > 10000, 'REASON_FOR_LOAN'] = 'Home Purchase'
df.loc[df['SALARY_PENSION_AMOUNT'] < 2000,
       'REASON_FOR_LOAN'] = 'Debt Consolidation'
df.loc[df['COMPANY_TYPE'] == 'Privata', 'REASON_FOR_LOAN'] = 'Car'

# Connect to the Neo4j database
# graph = Graph("bolt://localhost:7687", auth=("neo4j", "onekenoby"))
graph = Graph("bolt://localhost:7687", auth=("neo4j", "onekenoby"))

# Clear existing data
graph.delete_all()

# Create nodes and relationships
for i, row in df.iterrows():
    Reason_node = Node("Reason", name=row['REASON_FOR_LOAN'])
    graph.merge(Reason_node, "Reason", "name")

    for column in df.columns[:-1]:  # Exclude REASON_FOR_LOAN from iteration
        feature_node = Node("Feature", name=f"{column}_{row[column]}")
        graph.merge(feature_node, "Feature", "name")

        rel = Relationship(feature_node, "CORRELATES_WITH", Reason_node)
        graph.merge(rel)

print("Data successfully uploaded to Neo4j!")

In [None]:
import pandas as pd
import numpy as np
from py2neo import Graph, Node, Relationship

# Simulate dataset
np.random.seed(42)
n_samples = 1000

occupations = ['Official', 'Employee', 'Worker', 'Manager']
provinces = ['Milan', 'Rome', 'Naples', 'Turin',
             'Florence', 'Bologna', 'Genoa', 'Venice']
regions = ['Lombardy', 'Lazio', 'Campania', 'Piedmont',
           'Tuscany', 'Emilia-Romagna', 'Liguria', 'Veneto']
reasons_loan = ['Home Purchase', 'Renovation',
                'Car', 'Travel', 'Debt Consolidation']

data = {
    'AMOUNT_REQUESTED': np.random.randint(1000, 15001, n_samples),
    'TYPE OF OCCUPATION': np.random.choice(occupations, n_samples),
    'PROVINCE': np.random.choice(provinces, n_samples),
    'CONSENT_DATA_PRIVACY': np.random.choice(['si', 'no'], n_samples),
    'CONSENT_DATA_MRKTG': np.random.choice(['si', 'no'], n_samples),
    'CONSENT_DATA_ASSIGNMENT_TO_THIRD_PARTIES': np.random.choice(['si', 'no'], n_samples),
    'SEX': np.random.choice(['M', 'F'], n_samples),
    'REGION': np.random.choice(regions, n_samples),
    'SALARY_PENSION_AMOUNT': np.random.randint(1500, 5001, n_samples),
    'WORKING_YEARS_CATEGORY': np.random.randint(1, 41, n_samples),
    'COMPANY_TYPE': np.random.choice(['Private', 'Pubblic'], n_samples),
    'PERMANENT': np.random.choice(['si', 'no'], n_samples),
    'REASON_FOR_LOAN': np.random.choice(reasons_loan, n_samples)
}

df = pd.DataFrame(data)

df.to_csv("loan_data.csv", index=False)
print("CSV file 'loan_data.csv' has been saved.")

# Introduce some correlation between variables and the target
df.loc[df['AMOUNT_REQUESTED'] > 10000, 'REASON_FOR_LOAN'] = 'Home Purchase'
df.loc[df['SALARY_PENSION_AMOUNT'] < 2000,
       'REASON_FOR_LOAN'] = 'Debt Consolidation'
df.loc[df['COMPANY_TYPE'] == 'Private', 'REASON_FOR_LOAN'] = 'Car'

# Connect to the Neo4j database (specify the "loandata" database)


try:
    # Replace "your_password" with your actual password
    graph = Graph("bolt://127.0.0.1:7687", auth=("neo4j", "onekenoby"), name="loandata")
    graph.run("RETURN 1")
    print("Connection successful!")
except Exception as e:
    print(f"Connection failed: {e}")
    # Clear existing data in the specified database
    graph.delete_all()

# Create nodes and relationships
for i, row in df.iterrows():
    Reason_node = Node("Reason", name=row['REASON_FOR_LOAN'])
    graph.merge(Reason_node, "Reason", "name")

    for column in df.columns[:-1]:  # Exclude REASON_FOR_LOAN from iteration
        feature_node = Node("Feature", name=f"{column}_{row[column]}")
        graph.merge(feature_node, "Feature", "name")

        rel = Relationship(feature_node, "CORRELATES_WITH", Reason_node)
        graph.merge(rel)

print("Data successfully uploaded to the 'loandata' Neo4j database!")

In [None]:
import pandas as pd
import numpy as np
from py2neo import Graph, Node, Relationship

# Simulate CRM dataset
np.random.seed(42)
n_customers = 1000

# Define categories and attributes
industries = ['Retail', 'Finance', 'Healthcare', 'Technology', 'Education']
regions = ['North America', 'Europe', 'Asia', 'Australia', 'South America']
customer_tiers = ['Bronze', 'Silver', 'Gold', 'Platinum']
interactions = ['Email', 'Call', 'Meeting', 'Social Media', 'Event']
outcomes = ['Positive', 'Neutral', 'Negative']

# Generate random data
data = {
    'CUSTOMER_ID': range(1, n_customers + 1),
    'NAME': [f"Customer_{i}" for i in range(1, n_customers + 1)],
    'INDUSTRY': np.random.choice(industries, n_customers),
    'REGION': np.random.choice(regions, n_customers),
    'TIER': np.random.choice(customer_tiers, n_customers),
    'LAST_INTERACTION_TYPE': np.random.choice(interactions, n_customers),
    'LAST_INTERACTION_OUTCOME': np.random.choice(outcomes, n_customers),
    'TOTAL_PURCHASES': np.random.randint(1, 101, n_customers),
    'REVENUE_GENERATED': np.random.randint(1000, 100001, n_customers)
}

df = pd.DataFrame(data)

# Save to CSV
df.to_csv("crm_data.csv", index=False)
print("CSV file 'crm_data.csv' has been saved.")

# Introduce some correlations in the data
df.loc[df['TIER'] == 'Platinum','REVENUE_GENERATED'] = df['REVENUE_GENERATED'] * 1.5
df.loc[df['INDUSTRY'] == 'Finance', 'LAST_INTERACTION_OUTCOME'] = 'Positive'
df.loc[df['REGION'] == 'Asia', 'TOTAL_PURCHASES'] = df['TOTAL_PURCHASES'] + 10

# Connect to Neo4j
try:
    graph = Graph("bolt://127.0.0.1:7687", auth=("neo4j", "onekenoby"))
    print("Connection successful!")
except Exception as e:
    print(f"Connection failed: {e}")
    exit()

# Check or create the "mycrm" database (this needs to be done manually outside this script if not supported)
try:
    # Attempt to switch to "mycrm" database
    graph = Graph("bolt://127.0.0.1:7474", auth=("neo4j", "onekenoby"), name="mycrm")
    print("Switched to 'mycrm' database.")
except Exception as e:
    print(f"Could not switch to 'mycrm' database: {e}. Ensure it exists or create it manually in Neo4j.")
    exit()

# Clear existing data in the database
graph.delete_all()

# Create nodes and relationships -> CALL db.labels();
for i, row in df.iterrows():
    customer_node = Node("Customer", id=row['CUSTOMER_ID'], name=row['NAME'], tier=row['TIER'])
    graph.merge(customer_node, "Customer", "id")

    industry_node = Node("Industry", name=row['INDUSTRY'])
    graph.merge(industry_node, "Industry", "name")

    region_node = Node("Region", name=row['REGION'])
    graph.merge(region_node, "Region", "name")

    interaction_node = Node("Interaction", type=row['LAST_INTERACTION_TYPE'], outcome=row['LAST_INTERACTION_OUTCOME'])
    graph.merge(interaction_node, "Interaction", "type")

    # Relationships  -> CALL db.relationshipTypes();
    graph.merge(Relationship(customer_node, "WORKS_IN", industry_node))
    graph.merge(Relationship(customer_node, "LOCATED_IN", region_node))
    graph.merge(Relationship(customer_node, "HAD_INTERACTION", interaction_node))

print("Data successfully uploaded to the 'mycrm' Neo4j database!")

CSV file 'crm_data.csv' has been saved.
Connection successful!
Could not switch to 'mycrm' database: Cannot open connection to ConnectionProfile('bolt://127.0.0.1:7474'). Ensure it exists or create it manually in Neo4j.


 142662.   83074.5  72351.  126846.  130507.5  11742.   80496.   66088.5
  65898.  115479.   27361.5  97390.5  85101.   44431.5  70681.5  91795.5
  74991.  100366.5 149349.  134133.   92608.5 148780.5 111553.5  47359.5
 132690.   33298.5  88086.  130699.5  56107.5 148848.  121317.   46699.5
 129733.5 146433.   87984.   27456.   73936.5   2919.   24810.  147835.5
  44053.5  27864.   70312.5  27040.5  14881.5  94224.    2260.5 131154.
  38242.5 111559.5 112476.  114913.5 131395.5  16390.5 117694.5  38391.
 113778.  118069.5 116073.  102924.   40684.5  14019.   40710.   75084.
  43264.5  30940.5  94233.  127651.5  79872.   85197.   67674.  108501.
 114244.5  65421.   20580.   87912.  117363.   78483.  136710.   87261.
  31320.   68158.5  54121.5  79726.5  56736.   89791.5 105679.5 104208.
  23259.  121281.   80119.5 144579.  130689.   91308.   12352.5  49845.
 128359.5 140382.   62466.   39637.5 147973.5  75969.  144517.5  15285.
  58794.  122142.   16414.5  86638.5  84556.5 137715.    59

Data successfully uploaded to the 'mycrm' Neo4j database!


: 

In [None]:
from py2neo import Graph
import pandas as pd

# Connect to the Neo4j database
graph = Graph("bolt://localhost:7687", auth=("neo4j", "onekenoby"))

# Query to get all nodes and relationships
nodes_query = """
MATCH (n)
RETURN id(n) as id, labels(n) as labels, n.name as name
"""

relationships_query = """
MATCH ()-[r]->()
RETURN id(startNode(r)) as source, id(endNode(r)) as target, type(r) as type
"""

nodes = graph.run(nodes_query).to_data_frame()
relationships = graph.run(relationships_query).to_data_frame()

In [None]:
import networkx as nx
import plotly.graph_objects as go

# Create a networkx graph
G = nx.Graph()

# Add nodes
for index, row in nodes.iterrows():
    G.add_node(row['id'], label=row['labels'][0], name=row['name'])

# Add edges
for index, row in relationships.iterrows():
    G.add_edge(row['source'], row['target'], label=row['type'])

# Get positions for the nodes in 3D
pos = nx.spring_layout(G, dim=3, seed=42)

# Extract the coordinates
x_nodes = [pos[k][0] for k in G.nodes()]
y_nodes = [pos[k][1] for k in G.nodes()]
z_nodes = [pos[k][2] for k in G.nodes()]

# Create edge coordinates
x_edges = []
y_edges = []
z_edges = []
for edge in G.edges():
    x_edges += [pos[edge[0]][0], pos[edge[1]][0], None]
    y_edges += [pos[edge[0]][1], pos[edge[1]][1], None]
    z_edges += [pos[edge[0]][2], pos[edge[1]][2], None]

# Create the 3D plot
fig = go.Figure()

# Add edges
fig.add_trace(go.Scatter3d(
    x=x_edges, y=y_edges, z=z_edges,
    mode='lines',
    line=dict(color='black', width=1),
    hoverinfo='none'
))

# Add nodes
fig.add_trace(go.Scatter3d(
    x=x_nodes, y=y_nodes, z=z_nodes,
    mode='markers',
    marker=dict(symbol='circle', size=5, color='blue'),
    text=[f"{d['label']}: {d['name']}" for i, d in G.nodes(data=True)],
    hoverinfo='text'
))

# Set plot layout with increased height
fig.update_layout(
    title="3D Graph Visualization",
    showlegend=False,
    scene=dict(
        xaxis=dict(showbackground=False),
        yaxis=dict(showbackground=False),
        zaxis=dict(showbackground=False)
    ),
    width=1200,  # Width of the plot
    height=1000  # Height of the plot (double the default)
)

# Show the plot
fig.show()