# Raster Workflow

In [None]:
import rasterio
import matplotlib.pyplot as plt
import numpy as np
from scipy.ndimage import gaussian_filter
from sklearn.cluster import KMeans
from sklearn.ensemble import RandomForestClassifier, IsolationForest
from sklearn.metrics import silhouette_score

# Reading Raster Data
with rasterio.open('data.tif') as src:
    raster_data = src.read(1)
    profile = src.profile

# Plotting Raster Data
plt.imshow(raster_data, cmap='viridis')
plt.colorbar()
plt.show()

# Preprocessing Raster Data
# Handling missing values
raster_data = np.nan_to_num(raster_data, nan=0)
# Normalizing data
raster_data = (raster_data - np.mean(raster_data)) / np.std(raster_data)

# Post-Processing Raster Data
# Example: Apply Gaussian filter
raster_data_smoothed = gaussian_filter(raster_data, sigma=1)

# Statistical Analysis on Raster Data
# Summary statistics
mean = np.mean(raster_data)
std_dev = np.std(raster_data)
print(f"Mean: {mean}, Standard Deviation: {std_dev}")

# Machine Learning with Raster Data (Example: K-Means Clustering)
# Reshape raster data for clustering
raster_data_reshaped = raster_data.reshape(-1, 1)
kmeans = KMeans(n_clusters=3, random_state=42)
kmeans.fit(raster_data_reshaped)
clustered = kmeans.labels_.reshape(raster_data.shape)

# Classification with Raster Data
rf = RandomForestClassifier()
rf.fit(raster_data_reshaped, kmeans.labels_)
classified = rf.predict(raster_data_reshaped).reshape(raster_data.shape)

# Anomaly Detection with Raster Data
isolation_forest = IsolationForest(contamination=0.1)
anomalies = isolation_forest.fit_predict(raster_data_reshaped).reshape(raster_data.shape)

# Evaluation
silhouette = silhouette_score(raster_data_reshaped, kmeans.labels_)
print(f"Silhouette Score: {silhouette}")

# Plot Classified Raster Data
plt.imshow(classified, cmap='viridis')
plt.colorbar()
plt.show()


# CSV WorkFlow

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.cluster import KMeans
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import IsolationForest
from sklearn.metrics import mean_squared_error, accuracy_score

# Reading CSV Data
data = pd.read_csv('data.csv')

# Plotting CSV Data
data.plot()
plt.show()

# Preprocessing CSV Data
# Handling missing values
data = data.fillna(method='ffill')
# Normalizing data
data = (data - data.mean()) / data.std()

# Post-Processing CSV Data
# Example: Calculate moving average
data['moving_avg'] = data['column_name'].rolling(window=5).mean()

# Statistical Analysis on CSV Data
# Summary statistics
summary_stats = data.describe()
print(summary_stats)

# Machine Learning with CSV Data (Example: Linear Regression)
X = data[['feature1', 'feature2']]
y = data['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = LinearRegression()
model.fit(X_train, y_train)
predictions = model.predict(X_test)

# Clustering CSV Data (Example: K-Means Clustering)
kmeans = KMeans(n_clusters=3, random_state=42)
data['cluster'] = kmeans.fit_predict(X)

# Classification with CSV Data (Example: Decision Tree)
clf = DecisionTreeClassifier()
clf.fit(X_train, y_train)
predictions = clf.predict(X_test)

# Anomaly Detection with CSV Data (Example: Isolation Forest)
isolation_forest = IsolationForest(contamination=0.1)
data['anomaly'] = isolation_forest.fit_predict(X)

# Evaluation
mse = mean_squared_error(y_test, predictions)
accuracy = accuracy_score(y_test, predictions)
print(f"Mean Squared Error: {mse}, Accuracy: {accuracy}")

# Plot Clusters (CSV Data)
plt.scatter(data['feature1'], data['feature2'], c=data['cluster'], cmap='viridis')
plt.colorbar()
plt.show()
