Crop Disease & Yield Prediction system

In [None]:
import numpy as np
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split

# --- CNN: Crop Disease Detection ---
X_img, y_img = np.random.rand(100,64,64,3), np.random.randint(2,size=100)
cnn = Sequential([
    Conv2D(32,(3,3),activation='relu',input_shape=(64,64,3)),
    MaxPooling2D(2,2), Flatten(),
    Dense(128,activation='relu'),
    Dense(1,activation='sigmoid')
])
cnn.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])
cnn.fit(X_img,y_img,epochs=3,batch_size=10,verbose=0)

# --- Random Forest: Yield Prediction ---
X, y = np.random.rand(100,3), np.random.rand(100)*100
Xtr, Xte, ytr, yte = train_test_split(X,y,test_size=0.2,random_state=42)
rf = RandomForestRegressor(n_estimators=100,random_state=42).fit(Xtr,ytr)

# --- Recommendation ---
def recommend(d_pred,y_pred):
    if d_pred>=0.5: return "Disease detected! Apply pesticide."
    if y_pred<50: return "Low yield! Improve irrigation."
    return "Crop healthy, yield optimal."

# --- Test with Simulated Inputs ---
test_img = np.random.rand(1,64,64,3)
d_pred = cnn.predict(test_img,verbose=0)[0][0]
y_pred = rf.predict([[0.8,0.6,0.7]])[0]
print(f"Disease Prediction: {d_pred:.4f} (0:Healthy,1:Diseased)")
print(f"Yield Prediction: {y_pred:.2f} units")
print("Recommendation:", recommend(d_pred,y_pred))


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Disease Prediction: 0.4347 (0:Healthy,1:Diseased)
Yield Prediction: 49.63 units
Recommendation: Low yield! Improve irrigation.


content-based movie recommendation system

In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

# Data
movies = pd.DataFrame({
 'title':['The Shawshank Redemption','The Godfather','The Dark Knight','Pulp Fiction','The Lord of the Rings: The Return of the King'],
 'genres':['Drama','Crime, Drama','Action, Crime, Drama','Crime, Drama','Action, Adventure, Fantasy'],
 'desc':['Two imprisoned men bond over years.','Patriarch transfers crime dynasty.','Joker wreaks havoc on Gotham.','Mob hitmen tales of violence.','Gandalf and Aragorn fight Sauron.']
})
movies['content']=movies['genres']+' '+movies['desc']

# TF-IDF + Similarity
tfidf=TfidfVectorizer(stop_words='english')
sim=linear_kernel(tfidf.fit_transform(movies['content']),tfidf.fit_transform(movies['content']))

def recommend(title):
    i=movies[movies['title']==title].index[0]
    scores = list(enumerate(sim[i]))
    # Sort by similarity, skip the movie itself (index 0)
    top = sorted(scores, key=lambda x: x[1], reverse=True)[1:4]
    return movies['title'][[j[0] for j in top]]
    # for j in top:
    # print(movies['title'][j[0]])
# Output
print("Recommendations for 'The Godfather':\n", recommend('The Godfather'))


Recommendations for 'The Godfather':
 3                Pulp Fiction
2             The Dark Knight
0    The Shawshank Redemption
Name: title, dtype: object


hybrid movie recommendation system

In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel, pairwise_distances
from sklearn.decomposition import TruncatedSVD
from scipy.sparse import csr_matrix

# Data
movies = pd.DataFrame({
 'title':['The Shawshank Redemption','The Godfather','The Dark Knight','Pulp Fiction','The Lord of the Rings: The Return of the King'],
 'genres':['Drama','Crime, Drama','Action, Crime, Drama','Crime, Drama','Action, Adventure, Fantasy'],
 'desc':['Two imprisoned men bond.','Patriarch transfers crime dynasty.','Joker wreaks havoc.','Mob hitmen tales.','Gandalf vs Sauron.']
})
movies['content']=movies['genres']+' '+movies['desc']

ratings=pd.DataFrame({
 'user_id':[1,1,1,2,2,3,3],
 'title':['The Shawshank Redemption','The Godfather','The Dark Knight','The Dark Knight','Pulp Fiction','The Shawshank Redemption','Pulp Fiction'],
 'rating':[5,4,5,4,5,5,4]
})

# Models
tfidf = TfidfVectorizer(stop_words='english')
cos_sim = linear_kernel(tfidf.fit_transform(movies['content']))
mat = csr_matrix(ratings.pivot(index='user_id', columns='title', values='rating').fillna(0).values)
latent = TruncatedSVD(2).fit_transform(mat)

# Functions
def content_rec(title):
    idx=movies[movies['title']==title].index[0]
    sim = sorted(list(enumerate(cos_sim[idx])), key=lambda x:x[1], reverse=True)[1:4]
    return [movies['title'][i[0]] for i in sim]

def collab_rec(uid):
    i = ratings['user_id'].unique().tolist().index(uid)
    sim = pairwise_distances(latent[i].reshape(1,-1), latent, metric='cosine')[0].argsort()[:3]
    rec = []
    for j in sim:
      similar_user_id = ratings['user_id'].unique()[j]       # get actual user ID (not just position)
      user_movies = ratings[ratings['user_id'] == similar_user_id]['title']  # movies that user rated
      rec.extend(user_movies)
      return list(set(rec))

def hybrid(uid, title):
    return list(set(content_rec(title) + collab_rec(uid)))

# Output
print("Hybrid Recommendations:", hybrid(1, 'The Godfather'))


Hybrid Recommendations: ['The Shawshank Redemption', 'Pulp Fiction', 'The Dark Knight', 'The Godfather']


r Breast Cancer Prediction code

In [None]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Data
X, y = load_breast_cancer(return_X_y=True)
Xtr, Xte, ytr, yte = train_test_split(X, y, test_size=0.2, random_state=42)

# Model
m = RandomForestClassifier(100, random_state=42).fit(Xtr, ytr)
yp = m.predict(Xte)

# Results
print("Accuracy:", accuracy_score(yte, yp))
print("\nReport:\n", classification_report(yte, yp))
print("Confusion Matrix:\n", confusion_matrix(yte, yp))

# Recommendation
def advise(f):
    return ("⚠️ Malignant! Consult doctor." if m.predict([f])[0]==0
            else "✅ Benign. Routine checkup.")

print("\nRecommendation:", advise(Xte[0]))


Accuracy: 0.9649122807017544

Report:
               precision    recall  f1-score   support

           0       0.98      0.93      0.95        43
           1       0.96      0.99      0.97        71

    accuracy                           0.96       114
   macro avg       0.97      0.96      0.96       114
weighted avg       0.97      0.96      0.96       114

Confusion Matrix:
 [[40  3]
 [ 1 70]]

Recommendation: ✅ Benign. Routine checkup.


In [None]:
import pandas as pd
import numpy as np
from sklearn.decomposition import TruncatedSVD
from sklearn.metrics.pairwise import cosine_similarity

# === 1. Load and preprocess ===
df = pd.read_csv('/content/RS-A5_amazon_products_sales_data_cleaned.csv')
df['product_rating'] = df['product_rating'].fillna(df['product_rating'].mean())
df = df.sample(5000, random_state=42).reset_index(drop=True)

# Add dummy user and product IDs
df['user_id'] = np.random.randint(0, 500, len(df))  # 500 users for better overlap
df['product_id'] = df.index

# === 2. Create user-item matrix ===
pivot = df.pivot_table(index='user_id', columns='product_id', values='product_rating').fillna(0)

# === 3. Apply SVD ===
svd = TruncatedSVD(n_components=50, random_state=42)
latent_matrix = svd.fit_transform(pivot)
reconstructed = np.dot(latent_matrix, svd.components_)

# === 4. Recommend top 5 items for a random user ===
# NOTE: `train` was undefined — we should pick from pivot.index instead
user_id = np.random.choice(pivot.index)
user_idx = pivot.index.get_loc(user_id)

pred_ratings = reconstructed[user_idx]

# Find items already rated by the user
rated_items = pivot.columns[pivot.iloc[user_idx] > 0]

# Exclude already-rated items from recommendations
pred_ratings = {pid: score for pid, score in zip(pivot.columns, pred_ratings) if pid not in rated_items}

# Sort predicted ratings
top5 = sorted(pred_ratings.items(), key=lambda x: x[1], reverse=True)[:5]

# === 5. Display results ===
print(f"\nTop 5 Recommendations for user ID {user_id} :\n")
for i,_ in top5:
    print(i, "--->", df.loc[i, 'product_title'])

In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

df1 = pd.read_csv('/content/RS-A2_A3_movie.csv')
df2 = pd.read_csv('/content/RS-A2_A3_tag.csv')

movies = df1.merge(df2, on='movieId').fillna('')
movies['context'] = movies['genres'] + " " + movies['tag']

# IMPORTANT: reset index after sampling
movies = movies.sample(10000).reset_index(drop=True)
print(movies.head())


tfidf = TfidfVectorizer(stop_words='english')
mat = tfidf.fit_transform(movies['context'])
sim = linear_kernel(mat, mat)

def reco(id):
    # row index in the sampled dataset
    i = movies[movies['movieId'] == id].index
    i = i[0]
    score = list(enumerate(sim[i]))
    top = sorted(score, key=lambda x: x[1], reverse=True)[1:4]
    for j, _ in top:
        print(movies.loc[j, 'title'])


=

In [None]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel, pairwise_distances
from sklearn.decomposition import TruncatedSVD
from scipy.sparse import csr_matrix

# ✅ Load datasets
df1 = pd.read_csv('/content/RS-A2_A3_movie.csv')
df2 = pd.read_csv('/content/RS-A2_A3_tag.csv')
df3 = pd.read_csv('/content/RS-A2_A3_Filtered_Ratings.csv')

# ✅ Merge (movie + tags)
movies = df1.merge(df2, on='movieId', how='left').fillna('')

# ✅ SAMPLE **10,000 rows**
movies = movies.sample(10000).reset_index(drop=True)

# ✅ Create content column
movies['content'] = movies['genres'] + ' ' + movies['tag']

# ✅ Content-based model
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(movies['content'])

# ✅ Cosine similarity on 10K movies
cos_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

# ✅ Merge ratings with movie titles (using sampled movie list)
ratings = df3.merge(movies[['movieId', 'title']], on='movieId', how='inner')

# ✅ User-item matrix for collaborative filtering
pivot = ratings.pivot_table(
    index='userId',
    columns='title',
    values='rating'
).fillna(0)

mat = csr_matrix(pivot.values)

# ✅ SVD (use 20 components for 10k data)
latent = TruncatedSVD(n_components=20, random_state=42).fit_transform(mat)

# -------------------------------------------------------
# ✅ FUNCTIONS
# -------------------------------------------------------

def content_rec(title, top_n=5):
    """Content-based recommendation"""
    if title not in movies['title'].values:
        return ["Title not in 10k sampled movies"]

    idx = movies[movies['title'] == title].index[0]

    scores = list(enumerate(cos_sim[idx]))
    scores_sorted = sorted(scores, key=lambda x: x[1], reverse=True)[1:top_n+1]

    return [movies.iloc[i[0]]['title'] for i in scores_sorted]


def collab_rec(uid, top_n=5):
    """Collaborative filtering recommendation"""
    if uid not in pivot.index:
        return ["User ID not found in rating dataset"]

    user_idx = list(pivot.index).index(uid)

    distances = pairwise_distances(
        latent[user_idx].reshape(1, -1),
        latent,
        metric='cosine'
    )[0]

    similar_users = distances.argsort()[1:top_n+1]

    rec_movies = []
    for su in similar_users:
        real_uid = pivot.index[su]
        user_movies = ratings[ratings['userId'] == real_uid]['title'].tolist()
        rec_movies.extend(user_movies)

    return list(set(rec_movies))


def hybrid(uid, title):
    """Hybrid recommendation = content + collab"""
    c = content_rec(title)
    k = collab_rec(uid)
    return list(set(c + k))


# ✅ SAMPLE OUTPUT
print("✅ Hybrid Recommendations:", hybrid(1, movies.loc[0, 'title']))


In [None]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel, pairwise_distances
from sklearn.decomposition import TruncatedSVD
from scipy.sparse import csr_matrix

# ✅ Load datasets
df1 = pd.read_csv('/content/RS-A2_A3_movie.csv')
df2 = pd.read_csv('/content/RS-A2_A3_tag.csv')
df3 = pd.read_csv('/content/RS-A2_A3_Filtered_Ratings.csv')

# ✅ Merge movie + tags
movies = df1.merge(df2, on='movieId', how='left').fillna('')

# ✅ Sample 10k movies
movies = movies.sample(10000).reset_index(drop=True)

# ✅ Prepare content text
movies['content'] = movies['genres'] + ' ' + movies['tag']

# ✅ Content-based model
tfidf = TfidfVectorizer(stop_words='english')
cos_sim = linear_kernel(tfidf.fit_transform(movies['content']))

# ✅ Merge ratings only for sampled movies
ratings = df3.merge(movies[['movieId','title']], on='movieId', how='inner')

# ✅ Collaborative model pivot
pivot = ratings.pivot_table(index='userId', columns='movieId', values='rating').fillna(0)
latent = TruncatedSVD(20, random_state=42).fit_transform(csr_matrix(pivot.values))



# ✅ ✅ ✅ FUNCTIONS USING movieId
# -------------------------------------------------------

def content_rec(movie_id, top_n=5):
    """Content-based recommendation using movieId"""
    if movie_id not in movies['movieId'].values:
        return ["Movie ID not in sampled 10k dataset"]

    idx = movies.index[movies['movieId'] == movie_id][0]

    sims = list(enumerate(cos_sim[idx]))
    sims_sorted = sorted(sims, key=lambda x: x[1], reverse=True)[1:top_n+1]

    rec_ids = [movies.iloc[i[0]]['movieId'] for i in sims_sorted]
    return rec_ids



def collab_rec(uid, top_n=5):
    """Collaborative filtering recommendation using userId"""
    if uid not in pivot.index:
        return ["User ID not found"]

    user_idx = list(pivot.index).index(uid)

    distances = pairwise_distances(
        latent[user_idx].reshape(1, -1),
        latent,
        metric='cosine'
    )[0]

    similar_users = distances.argsort()[1:top_n+1]

    rec_ids = []
    for u in similar_users:
        real_uid = pivot.index[u]
        watched = ratings[ratings['userId'] == real_uid]['movieId'].tolist()
        rec_ids.extend(watched)

    return list(set(rec_ids))



def hybrid(uid, movie_id):
    """Hybrid = content + collaborative"""
    c = content_rec(movie_id)
    k = collab_rec(uid)
    return list(set(c + k))



# ✅ SAMPLE TEST (using movieId)
sample_movie_id = movies.loc[0, 'movieId']
print("Movie ID:", sample_movie_id)
print("✅ Hybrid Recommendations:", hybrid(1, sample_movie_id))
