In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
data = pd.read_csv('../input/bbc-fulltext-and-category/bbc-text.csv')

In [None]:
data.head()

# Topic Classification

We will perform topic classification in two ways. First would be by using bow and second would be by using tfidf. This is because I want to know weather the topics related to specific text matters more or weather all words matter in topic classification.

In [None]:
data['category'].value_counts()

So we have 5 categories :
Sport
Business
Politics
Tech
Entertainment

In [None]:
from gensim.utils import simple_preprocess

In [None]:
data.text = data.text.apply(simple_preprocess,min_len=3)

In [None]:
data.text.head()

Now we need to remove stopwords. Also we have to lemmatize the text.

In [None]:
from nltk.corpus import stopwords 
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer 
  
stop_words = set(stopwords.words('english')) 
def stemandstop(lis):
    lemmatizer = WordNetLemmatizer()
    filtered_lis = [lemmatizer.lemmatize(w) for w in lis if not w in stop_words and len(w) > 2]
    return filtered_lis

In [None]:
data.text = data.text.apply(stemandstop)

In [None]:
data.text.head()

Converting into BOW model : We will use SVM to Classify

In [None]:
data.text = data.text.apply(' '.join)

In [None]:
data.head()

In [None]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.pipeline import Pipeline
text_clf = Pipeline([
    ('vect', CountVectorizer()),
    ('tfidf', TfidfTransformer()),
    ('clf', MultinomialNB()),
])

In [None]:
# text_clf.fit()

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X = data.text
y = data.category

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.33, random_state=42)

In [None]:
text_clf.fit(X_train,y_train)

In [None]:
predictions = text_clf.predict(X_test)

In [None]:
predictions[:20]

In [None]:
from sklearn.metrics import accuracy_score

In [None]:
accuracy_score(y_test,predictions)

# SVM CLASSIFIER

In [None]:
from sklearn.svm import SVC
svm_clf = Pipeline([
    ('vect', CountVectorizer()),
#     ('tfidf', TfidfTransformer()),
    ('clf', SVC()),
])

In [None]:
svm_clf.fit(X_train, y_train)

In [None]:
svmpred = svm_clf.predict(X_test)

In [None]:
accuracy_score(y_test,svmpred)

In [None]:
svm_clftfidf = Pipeline([
    ('vect', CountVectorizer()),
    ('tfidf', TfidfTransformer()),
    ('clf', SVC()),
])

In [None]:
svm_clftfidf.fit(X_train, y_train)

In [None]:
tfsvmpred = svm_clftfidf.predict(X_test)

In [None]:
accuracy_score(y_test,tfsvmpred)

In [None]:
text = """
Microsoft is planning to bring its Xbox game-streaming service to iPhones next year despite a public row with Apple over the app.
The Game Pass streaming service was launched for Android phones last month.
But Apple barred the app from its iOS app store, saying all 100 or so games on the service should be listed individually.
Microsoft is now developing a web-browser-based version that should work on iPhones.
Microsoft employees have been told that a "browser-based solution" is planned for next year, according to reports in both Business Insider and The Verge.
"We absolutely will end up on iOS," Xbox head Phil Spencer said, according to both outlets.
The streaming service works by offering an interface of games to choose from. Selecting one seamlessly launches the game - assuming the user has a good enough internet connection for the large amount of streaming data needed.
But ahead of the planned launch, Apple said its app store rules require that each game is submitted individually for review.
Since Microsoft plans to add and remove games on the service constantly, it said it did "not have a path" to bring the service to Apple's iOS.
However, Apple released updated guidelines last month which explicitly said that "open internet and web browser apps" are a viable way for game streaming to work, clarifying the rules for Microsoft and other streaming apps.
Microsoft now appears to have opted for this route.
Is game streaming bad for the environment?
Xbox brings blockbuster games to smartphones
Other cloud gaming apps have already chosen a similar path.
Google Stadia has a web-browser-based option for the PC gaming market, and users have found ways to get it running on iOS through third-party apps that are essentially customised web browsers.
And Amazon, which recently announced its Luna game-streaming service, has said it will work on iPhones and iPads at launch through the browser.
"""

In [None]:
svm_clftfidf.predict([text])

# Live Test link
https://www.bbc.com/news/technology-54479124