In [None]:
import pandas as pd

df = pd.read_csv('equipment_anomaly_data.csv')

print(df.head())

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Set the style for seaborn
sns.set(style="whitegrid")

# Plot histograms for each feature
df.hist(bins=15, figsize=(15, 10))
plt.tight_layout()
plt.show()


In [None]:
df['location']=df.location.map({'Atlanta':1,'Chicago':2,'San Francisco':3,'New York':4,'Houston':5})
df

In [None]:
df['equipment']=df.equipment.map({'Turbine':1,'Compressor':2,'Pump':3})
df

In [None]:
# Compute the correlation matrix
corr = df.corr()

# Generate a heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(corr, annot=True, cmap='coolwarm', fmt='.2f', linewidths=0.5)
plt.title('Correlation Heatmap')
plt.show()


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [None]:
X = df.drop('faulty', axis=1)  # Assuming 'faulty' is the target variable
y = df['faulty']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
from sklearn.ensemble import RandomForestClassifier

# Initialize the model
model = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model
model.fit(X_train_scaled, y_train)


In [None]:
# Make predictions
y_pred = model.predict(X_test_scaled)
y_pred

In [None]:
importances = model.feature_importances_

# Create a DataFrame for visualization
features = X.columns
feature_importance_df = pd.DataFrame({
    'Feature': features,
    'Importance': importances
})

# Sort the features by importance
feature_importance_df = feature_importance_df.sort_values(by='Importance', ascending=False)

# Plot the feature importances
plt.figure(figsize=(10, 6))
sns.barplot(x='Importance', y='Feature', data=feature_importance_df)
plt.title('Feature Importance')
plt.show()

In [None]:
from rdflib import Graph, Literal, RDF, URIRef, Namespace
from rdflib.namespace import XSD, RDFS, OWL, FOAF

In [None]:
g = Graph()

# Define namespaces
SOSA = Namespace("http://www.w3.org/ns/sosa/")
SSN = Namespace("http://www.w3.org/ns/ssn/")
SAREF = Namespace("https://w3id.org/saref#")
EX = Namespace("http://example.org/industrial#")

# Bind prefixes for readability
g.bind("sosa", SOSA)
g.bind("ssn", SSN)
g.bind("saref", SAREF)
g.bind("ex", EX)

In [None]:

for idx, row in df.iterrows():
   
    obs_uri = EX[f"Observation{idx+1}"]
    device_uri = EX[row['equipment']]
    loc_uri = EX[row['location']]
    
    g.add((device_uri, RDF.type, SAREF.Device))
    
    g.add((loc_uri, RDF.type, SOSA.FeatureOfInterest))
    
    g.add((obs_uri, RDF.type, SOSA.Observation))
    g.add((obs_uri, SOSA.madeBySensor, device_uri))
    g.add((obs_uri, SOSA.hasFeatureOfInterest, loc_uri))
    
    g.add((EX['Temperature'], RDF.type, SOSA.ObservableProperty))
    g.add((EX['Pressure'], RDF.type, SOSA.ObservableProperty))
    g.add((EX['Vibration'], RDF.type, SOSA.ObservableProperty))
    g.add((EX['Humidity'], RDF.type, SOSA.ObservableProperty))
    
    g.add((obs_uri, SOSA.observedProperty, EX['Temperature']))
    g.add((obs_uri, SOSA.hasSimpleResult, Literal(row['temperature'], datatype=XSD.float)))
    
    g.add((obs_uri, SOSA.observedProperty, EX['Pressure']))
    g.add((obs_uri, SOSA.hasSimpleResult, Literal(row['pressure'], datatype=XSD.float)))
    
    g.add((obs_uri, SOSA.observedProperty, EX['Vibration']))
    g.add((obs_uri, SOSA.hasSimpleResult, Literal(row['vibration'], datatype=XSD.float)))
    
    g.add((obs_uri, SOSA.observedProperty, EX['Humidity']))
    g.add((obs_uri, SOSA.hasSimpleResult, Literal(row['humidity'], datatype=XSD.float)))
    
    g.add((device_uri, SAREF.hasFault, Literal(row['faulty'], datatype=XSD.boolean)))


In [None]:
# Save graph as Turtle file
g.serialize("industrial_data.ttl", format="turtle")

# Print graph in Turtle format
print(g.serialize(format="turtle"))


In [None]:
from rdflib.plugins.sparql import prepareQuery

# Define SPARQL query
q = prepareQuery("""
    PREFIX saref: <https://w3id.org/saref#>
    PREFIX ex: <http://example.org/industrial#>

    SELECT ?device ?fault
    WHERE {
        ?device a saref:Device ;
                saref:hasFault ?fault .
        FILTER(?fault = true)
    }
""")

# Run query
for row in g.query(q):
    print(f"Device: {row.device}, Fault: {row.fault}")


In [None]:
q2 = prepareQuery("""
    PREFIX sosa: <http://www.w3.org/ns/sosa/>
    PREFIX ex: <http://example.org/industrial#>

    SELECT ?obs ?value
    WHERE {
        ?obs a sosa:Observation ;
             sosa:observedProperty ex:Temperature ;
             sosa:hasSimpleResult ?value .
    }
    ORDER BY DESC(?value)
    LIMIT 5
""")

for row in g.query(q2):
    print(f"Observation: {row.obs}, Temperature: {row.value}")
