Load the data from a CSV file into a Pandas DataFrame.

In [None]:
import pandas as pd

df = pd.read_csv('data.csv')

View the shape of the data (rows, columns).

In [None]:
print(df.shape)

Get a concise summary of the DataFrame, including non-null counts.

In [None]:
print(df.info())

Remove rows with missing values.

In [None]:
df.dropna(inplace=True)

View the shape of the data after removing nulls.

In [None]:
print(df.shape)

Display the first few rows of the DataFrame.

In [None]:
print(df.head())

Drop the unnamed column from the DataFrame.

In [None]:
df.drop(columns=['Unnamed: 0'], inplace=True)

Display the updated DataFrame after dropping the unnamed column.

In [None]:
print(df.head())

Create a correlation heatmap to visualize relationships between variables.

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

sns.heatmap(df.corr(), annot=True)
plt.show()

Create a barplot to visualize the count of each programming language.

In [None]:
sns.barplot(x='language', y='count', data=df)
plt.show()

Group the data by language and calculate the average for each group.

In [None]:
grouped = df.groupby('language').mean()

Filter the DataFrame for entries with a vote average greater than 3.

In [None]:
filtered = df[df['vote_average'] > 3]

Get an overview of the data with descriptive statistics.

In [None]:
example_overview = df.describe()

Process text data by converting it into a matrix of token counts.

In [None]:
from sklearn.feature_extraction.text import CountVectorizer

vectorizer = CountVectorizer()
X = vectorizer.fit_transform(df['text'])

Remove stopwords from the text column.

In [None]:
from nltk.corpus import stopwords

stop_words = set(stopwords.words('english'))
df['processed_text'] = df['text'].apply(lambda x: ' '.join([word for word in x.split() if word not in stop_words]))

Stem the words in the processed text.

In [None]:
from nltk.stem import PorterStemmer

stemmer = PorterStemmer()
df['stemmed_text'] = df['processed_text'].apply(lambda x: ' '.join([stemmer.stem(word) for word in x.split()]))