## Import Libraries and Data

In [1]:
import os
from helper_funcs import get_tokens,get_headers,fetch_top_posts,fetch_last_posts
import pandas as pd 
import numpy as np
import ast
from sklearn.preprocessing import MultiLabelBinarizer
from xgboost import XGBClassifier
import plotly.express as px
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix,classification_report

In [2]:
PRODUCT_HUNT_API_KEY = os.getenv("PRODUCT_HUNT_API_KEY")
PRODUCT_HUNT_API_SECRET = os.getenv('PRODUCT_HUNT_API_SECRET')

In [3]:
token, token_type = get_tokens()
headers = get_headers(token,token_type)

In [4]:
start_date="2024-08-01T00:00:00Z"
end_date = "2024-08-31T23:59:59Z"

In [5]:
top_posts = fetch_top_posts(start_date=start_date,end_date=end_date,headers=headers, limit =100)
last_posts = fetch_last_posts(start_date=start_date,end_date=end_date,headers=headers, limit =100)

In [6]:
print(f"Name :{top_posts[1]['node']['name']},\nDescription:{top_posts[1]['node']['description']},\nFirst comment:{top_posts[1]['node']['comments']['nodes'][0]['body']}")

Name :Me.bot,
Description:Me.bot captures and connects your thoughts to understand you better, synthesizing a coach for all your life challenges, from a big career move to a small gloomy moment.,
First comment:Hello, I’m Felix Tao, the CEO of Mindverse. Let me introduce <b>Me.bot</b> to you. It is an app I use everyday, for several hours!

Surrounded by centralized AI models, we believe in exploring a path where everyone can <b>train their own personal AI</b>. Everyone deserves an AI defined by them, not by a "Big Brother."

<b>Our solution</b>
Our product, Me.bot, is designed to be a <b>personalized AI companion</b>. It learns and evolves with you, coaching you based on your unique experiences and interactions. You can easily build your memory archive with Me.bot all-compassing multimodal recognition, and Me.bot will connect the dots of your memories to inspire and support you.

<b>Key features</b>
🌟<b>Serendipity</b>: Me.bot learns from you, offering inspiration and advice when you n

In [7]:
print(f"Name :{last_posts[1]['node']['name']},\nDescription:{last_posts[1]['node']['description']},\nFirst comment:{last_posts[1]['node']['comments']['nodes'][0]['body']}")

Name :Startup Accelerator Program,
Description:Startup accelerators provide essential support for early-stage startups, offering expert mentorship, structured growth plans, access to resources, funding opportunities, and valuable networking. These programs help startups refine their business strategies.,
First comment:Startup accelerators are crucial for early-stage ventures, providing expert guidance and essential resources to foster growth. These programs help startups refine strategies, secure funding, and build valuable networks, significantly boosting their chances of success.


## EDA - Top Posts of August

In [8]:
top_posts_df = pd.DataFrame([top_posts[i]['node'] for i in range(len(top_posts))])
top_posts_df.head()

Unnamed: 0,name,description,url,votesCount,createdAt,tagline,commentsCount,comments,topics
0,Wordware (YC S24),Wordware is an IDE that enables anyone to buil...,https://www.producthunt.com/posts/wordware-yc-...,7467,2024-08-02T07:01:00Z,Your tool for building AI agents with natural ...,165,{'nodes': [{'body': '👋🏻 Hi Product Hunt makers...,"{'nodes': [{'slug': 'software-engineering'}, {..."
1,Me.bot,Me.bot captures and connects your thoughts to ...,https://www.producthunt.com/posts/me-bot-2?utm...,2663,2024-08-06T07:01:00Z,The inspiring companion for your life,432,"{'nodes': [{'body': 'Hello, I’m Felix Tao, the...","{'nodes': [{'slug': 'productivity'}, {'slug': ..."
2,10xlaunch,Just add our one-line script to your website a...,https://www.producthunt.com/posts/10xlaunch?ut...,1577,2024-08-27T07:01:00Z,Get 10x more users from same website traffic,206,"{'nodes': [{'body': 'Hey fam, Mo here from 10...","{'nodes': [{'slug': 'sales'}, {'slug': 'artifi..."
3,Flowith,Flowith is the AI for deep work. Surpassing tr...,https://www.producthunt.com/posts/flowith?utm_...,1474,2024-08-07T07:01:00Z,AI for deep work,109,{'nodes': [{'body': 'Hey Product Hunt communit...,"{'nodes': [{'slug': 'productivity'}, {'slug': ..."
4,Brainybear.ai,Build AI Chatbots in 3 Steps and Train in 3 Cl...,https://www.producthunt.com/posts/brainybear-a...,1370,2024-08-06T07:01:00Z,Train AI chatbots in 3 clicks and help custome...,108,"{'nodes': [{'body': 'Hey Hunters, I'm thrille...","{'nodes': [{'slug': 'messaging'}, {'slug': 'ar..."


In [9]:
top_posts_df['date'] = pd.to_datetime(top_posts_df['createdAt']).dt.date
top_posts_df['day'] = pd.to_datetime(top_posts_df['createdAt']).dt.day_name()
top_posts_df['topic_list'] = top_posts_df['topics'].apply(lambda x: [j['slug'] for j in x['nodes']])

In [10]:
px.histogram(top_posts_df, x='day',title='Day of Launch')

Of the top performing posts of August, the common trend is to launch on Weekdays.

In [11]:
px.scatter(top_posts_df, x='day', y='votesCount',title='Vote Count compared to day of Launch')

In [12]:
px.box(top_posts_df, x='day', y='votesCount',title='Vote Count compared to day of Launch')

In [13]:
px.bar(top_posts_df.groupby('day')['votesCount'].mean().sort_values(ascending=False).round(0),title='Average Vote Count vs Day of Launch')

In [14]:
px.bar(top_posts_df[top_posts_df['votesCount']<7000].groupby('day')['votesCount'].mean().sort_values(ascending=False).round(0),title='Average Vote Count(without the outlier) vs Day of Launch')

In [15]:
top_posts_df.groupby('day')['votesCount'].mean().round(0).sort_values(ascending=False)

day
Friday       1191.0
Tuesday       877.0
Wednesday     778.0
Monday        736.0
Thursday      692.0
Saturday      638.0
Sunday        590.0
Name: votesCount, dtype: float64

In [16]:
top_posts_df[top_posts_df['votesCount']<7000].groupby('day')['votesCount'].mean().round(0).sort_values(ascending=False)

day
Tuesday      877.0
Wednesday    778.0
Monday       736.0
Thursday     692.0
Friday       668.0
Saturday     638.0
Sunday       590.0
Name: votesCount, dtype: float64

In [17]:
top_posts_df.groupby('day')['commentsCount'].mean().round(0).sort_values(ascending=False)

day
Tuesday      191.0
Wednesday    175.0
Monday       173.0
Thursday     172.0
Friday       158.0
Saturday     143.0
Sunday       128.0
Name: commentsCount, dtype: float64

In [18]:
px.bar(top_posts_df.groupby('day')['commentsCount'].mean().round(0).sort_values(ascending=False),title='Comments vs Day of Launch')

**Few trends observed:**
* The average votes gained seems to be high on the weekdays rather than on Saturdays and Sundays.
* Also it's worth noting that launch of one startup heavily skewed the average votes data

*One of the reasons behind this trend may be that people like to enjoy their weekends and are not that active on Product Hunt.*

**Also typically, from observation, the launches that do well in the early hours or on the first day of the launch tend to do well throughout. One way to test if this observation holds true is to check the correlation between the votes gained within 24 hours to launch vs total votes gained. But this is not possible as the Product Hunt's GraphQL API only provides total votes at the moment**

In [19]:
px.bar(top_posts_df.explode('topic_list')['topic_list'].value_counts()[:10],title = 'Topics of Top 100 launches of August')

As expected most (64 of 100) of the top launches in August are related to the AI domain

***I have been following product Hunt for a while and one thing that stands out in top performing posts almost always has been the first comment from the maker.***

## EDA II

Let us take a look at the difference between launches that are voted the most and voted the least in the month of August

Here are a few things we can take a look at:
* The description of the product/service
* First comment by the maker: which provides a detailed overview of how the product works and other details

We can use the description to train a classification model to determine whether a launch will be successful based on the number of votes it gets

Although product matters the most when it comes to the votes it gets but how the product is marketed also matters

In [20]:
for i in range(len(top_posts_df[:5])):
    print(top_posts_df.loc[i,'description'])
    print()

Wordware is an IDE that enables anyone to build complex AI Agents and applications. Domain experts and engineers can now iterate 20x faster with prebuilt tools, API deployment, tracing, and more. Finally, build high-quality and reliable AI!

Me.bot captures and connects your thoughts to understand you better, synthesizing a coach for all your life challenges, from a big career move to a small gloomy moment.

Just add our one-line script to your website and know exactly who's visiting your website – get names, emails, and LinkedIn profiles of your anonymous website visitors automatically as soon as they land on your website.

Flowith is the AI for deep work. Surpassing traditional chat-based tools, it streamlines tasks on a multi-thread interface powered by a most advanced agent framework. The intuitive canvas and smart framework boost productivity, helping users stay in the flow.

Build AI Chatbots in 3 Steps and Train in 3 Clicks. Brainybear scans your website or uploaded files to del

In [21]:
last_posts_df = pd.DataFrame([last_posts[i]['node'] for i in range(len(top_posts))])
last_posts_df['date'] = pd.to_datetime(last_posts_df['createdAt']).dt.date
last_posts_df['day'] = pd.to_datetime(last_posts_df['createdAt']).dt.day_name()
last_posts_df['topic_list'] = last_posts_df['topics'].apply(lambda x: [j['slug'] for j in x['nodes']])
last_posts_df.head()

Unnamed: 0,name,description,url,votesCount,createdAt,tagline,commentsCount,comments,topics,date,day,topic_list
0,MyBranz,Discover authentic reviews and aggregated shop...,https://www.producthunt.com/posts/mybranz-2?ut...,1,2024-08-05T20:09:33Z,Authentic Reviews & Aggregated Shopping Insights,1,{'nodes': [{'body': 'We're thrilled to announc...,{'nodes': [{'slug': 'artificial-intelligence'}...,2024-08-05,Monday,"[artificial-intelligence, e-commerce]"
1,Startup Accelerator Program,Startup accelerators provide essential support...,https://www.producthunt.com/posts/startup-acce...,1,2024-08-06T06:40:57Z,Startup Accelerator Program Will Boost Your Bu...,1,{'nodes': [{'body': 'Startup accelerators are ...,"{'nodes': [{'slug': 'fintech'}, {'slug': 'inve...",2024-08-06,Tuesday,"[fintech, investing, business]"
2,Efficient Solution with Medical Billing,Are you struggling with your practice's billin...,https://www.producthunt.com/posts/efficient-so...,1,2024-08-07T04:42:28Z,medical billing consultant,0,{'nodes': []},"{'nodes': [{'slug': 'medical'}, {'slug': 'cons...",2024-08-07,Wednesday,"[medical, consulting, health]"
3,Apps 365,"Harmonizing with Microsoft 365, and Showcasing...",https://www.producthunt.com/posts/apps-365?utm...,1,2024-08-07T08:53:06Z,Business & HR Solutions Redefined With AI Auto...,0,{'nodes': []},"{'nodes': [{'slug': 'productivity'}, {'slug': ...",2024-08-07,Wednesday,"[productivity, saas]"
4,Cook Book,"Cook Book - A simplified, customizable desktop...",https://www.producthunt.com/posts/cook-book?ut...,1,2024-08-06T21:00:32Z,"Your Simplified App for Recipes, Meal Plans, G...",1,{'nodes': [{'body': 'Hey Product Hunt! 🎉 We’re...,"{'nodes': [{'slug': 'cookbooks'}, {'slug': 'co...",2024-08-06,Tuesday,"[cookbooks, cooking, design]"


In [22]:
for i in range(len(last_posts_df[:5])):
    print(last_posts_df.loc[i,'description'])
    print()

Discover authentic reviews and aggregated shopping insights on MyBranz. Our platform ensures transparency with AI-driven review verification, comprehensive review summaries, and brand pledges against fake reviews. Shop smart and informed with MyBranz.

Startup accelerators provide essential support for early-stage startups, offering expert mentorship, structured growth plans, access to resources, funding opportunities, and valuable networking. These programs help startups refine their business strategies.

Are you struggling with your practice's billing and coding? Medical Billing Consultants offer expert services to streamline your billing processes, ensure accurate coding, and enhance your revenue cycle management.

Harmonizing with Microsoft 365, and Showcasing a Mesmerizing UI

Cook Book - A simplified, customizable desktop app. - Easy recipe management with beautiful recipe cards. - Organize meals with a dynamic calendar. - Download or share interactive grocery lists. One-time buy

In [23]:
top_posts_df.iloc[0]['comments']['nodes'][0]['body']

'👋🏻 Hi Product Hunt makers!\n\nI’m Kamil, Head of Growth at Wordware—an IDE for building AI agents. Today, we’re officially launching the Wordware platform, and we’re excited to show the world what we built.\n\nIt’s a tool (an IDE) that enables you to quickly build custom AI agents for specific use cases like legal contract generation, marketing content automation, invoice analysis, candidate screening, generating PRDs, and many more. We call applications built on Wordware ‘WordApps’ because you can create them using natural language—in other words, using words (pun intended).\n\nOur core belief is that the domain expert—not the engineer—knows what good LLM output looks like. For example, lawyers building legal SaaS need to be deeply involved in the process, and working directly in the codebase or going back-and-forth with engineers isn’t the way to go.\n\nMost of our clients are cross-functional teams, including less technical members, who need to collaborate with engineers on LLM app

In [24]:
print(top_posts_df.iloc[0]['comments']['nodes'][0]['body'])

👋🏻 Hi Product Hunt makers!

I’m Kamil, Head of Growth at Wordware—an IDE for building AI agents. Today, we’re officially launching the Wordware platform, and we’re excited to show the world what we built.

It’s a tool (an IDE) that enables you to quickly build custom AI agents for specific use cases like legal contract generation, marketing content automation, invoice analysis, candidate screening, generating PRDs, and many more. We call applications built on Wordware ‘WordApps’ because you can create them using natural language—in other words, using words (pun intended).

Our core belief is that the domain expert—not the engineer—knows what good LLM output looks like. For example, lawyers building legal SaaS need to be deeply involved in the process, and working directly in the codebase or going back-and-forth with engineers isn’t the way to go.

Most of our clients are cross-functional teams, including less technical members, who need to collaborate with engineers on LLM applications

In [25]:
print(top_posts_df.iloc[1]['comments']['nodes'][0]['body'])

Hello, I’m Felix Tao, the CEO of Mindverse. Let me introduce <b>Me.bot</b> to you. It is an app I use everyday, for several hours!

Surrounded by centralized AI models, we believe in exploring a path where everyone can <b>train their own personal AI</b>. Everyone deserves an AI defined by them, not by a "Big Brother."

<b>Our solution</b>
Our product, Me.bot, is designed to be a <b>personalized AI companion</b>. It learns and evolves with you, coaching you based on your unique experiences and interactions. You can easily build your memory archive with Me.bot all-compassing multimodal recognition, and Me.bot will connect the dots of your memories to inspire and support you.

<b>Key features</b>
🌟<b>Serendipity</b>: Me.bot learns from you, offering inspiration and advice when you need it most.
🧠<b>Second Brain</b>: Me.bot helps you understand yourself better and presents its insights into you.
💬<b>Speak to Remind</b>: Set reminders with your voice—it's easier than ever.
📁<b>Smart Topics<

In [26]:
[last_posts_df.iloc[12]['comments']['nodes'][0]['body'] if(last_posts_df.iloc[12]['comments']['nodes']) else None]

[None]

In [27]:
top_first_comments_len = [len(top_posts[i]['node']['comments']['nodes'][0]['body']) for i in range(len(top_posts))]

In [28]:
px.histogram(top_first_comments_len)

In [29]:
np.array(top_first_comments_len).mean()

1595.18

In [30]:
last_first_comments_len=[len(last_posts[i]['node']['comments']['nodes'][0]['body']) 
 if last_posts[i]['node']['comments']['nodes'] else 0
 for i in range(len(last_posts))]

In [31]:
px.histogram(last_first_comments_len)

*As we can see from couple of example from both types of posts, the first comment from the maker is very detailed in top performing launches whereas the first comments in case of the launches with least amount of votes the comment is not very detailed and sometimes there is no comment from the maker*

## Classification Model

In [41]:
df = pd.read_csv('PH_posts_data.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,name,description,url,votesCount,createdAt,tagline,commentsCount,comments,topics
0,0,Not Diamond,Not Diamond isn’t like other chatbots you’ve u...,https://www.producthunt.com/posts/not-diamond?...,691,2024-08-01T07:01:00Z,The last chatbot you’ll ever need,219,{'nodes': [{'body': 'Hey Product Hunt!\r\n\r\n...,"{'nodes': [{'slug': 'developer-tools'}, {'slug..."
1,1,Clarity,Clarity is purpose-built for founder-led sales...,https://www.producthunt.com/posts/clarity-9f37...,545,2024-08-01T07:01:00Z,A meeting recorder for founder-led sales,199,"{'nodes': [{'body': ""Congrats on the launch Au...","{'nodes': [{'slug': 'productivity'}, {'slug': ..."
2,2,Mito Health,Mito Health uses blood work at regular labs to...,https://www.producthunt.com/posts/mito-health?...,358,2024-08-01T07:01:00Z,Better insights from bloodwork,117,{'nodes': [{'body': 'Hi Product Hunt! 👋 \n\nWe...,"{'nodes': [{'slug': 'health-fitness'}, {'slug'..."
3,3,EduWiz.AI,Improve your writing effortlessly with EduWiz....,https://www.producthunt.com/posts/eduwiz-ai?ut...,292,2024-08-01T07:01:00Z,Write magical paperwork in seconds with AI,105,"{'nodes': [{'body': 'Hey, Product Hunt! 👋\n\nI...","{'nodes': [{'slug': 'writing'}, {'slug': 'educ..."
4,4,Mind Visuals,Stay in your creator zone and edit videos in s...,https://www.producthunt.com/posts/mind-visuals...,278,2024-08-01T07:01:00Z,Drag and drop animations for creators,61,{'nodes': [{'body': 'Mind Visuals is now live!...,"{'nodes': [{'slug': 'design-tools'}, {'slug': ..."


In [42]:
df.drop('Unnamed: 0',axis=1,inplace=True)
df['Date'] = pd.to_datetime(df['createdAt']).dt.date
# df['date'] = pd.to_datetime(df['createdAt']).dt.date
df['day'] = pd.to_datetime(df['createdAt']).dt.day_name()
df['topic_list'] = df['topics'].apply(lambda x: [j['slug'] for j in ast.literal_eval(x)['nodes']])
df.head()

Unnamed: 0,name,description,url,votesCount,createdAt,tagline,commentsCount,comments,topics,Date,day,topic_list
0,Not Diamond,Not Diamond isn’t like other chatbots you’ve u...,https://www.producthunt.com/posts/not-diamond?...,691,2024-08-01T07:01:00Z,The last chatbot you’ll ever need,219,{'nodes': [{'body': 'Hey Product Hunt!\r\n\r\n...,"{'nodes': [{'slug': 'developer-tools'}, {'slug...",2024-08-01,Thursday,"[developer-tools, artificial-intelligence, bots]"
1,Clarity,Clarity is purpose-built for founder-led sales...,https://www.producthunt.com/posts/clarity-9f37...,545,2024-08-01T07:01:00Z,A meeting recorder for founder-led sales,199,"{'nodes': [{'body': ""Congrats on the launch Au...","{'nodes': [{'slug': 'productivity'}, {'slug': ...",2024-08-01,Thursday,"[productivity, sales, artificial-intelligence]"
2,Mito Health,Mito Health uses blood work at regular labs to...,https://www.producthunt.com/posts/mito-health?...,358,2024-08-01T07:01:00Z,Better insights from bloodwork,117,{'nodes': [{'body': 'Hi Product Hunt! 👋 \n\nWe...,"{'nodes': [{'slug': 'health-fitness'}, {'slug'...",2024-08-01,Thursday,"[health-fitness, artificial-intelligence, life..."
3,EduWiz.AI,Improve your writing effortlessly with EduWiz....,https://www.producthunt.com/posts/eduwiz-ai?ut...,292,2024-08-01T07:01:00Z,Write magical paperwork in seconds with AI,105,"{'nodes': [{'body': 'Hey, Product Hunt! 👋\n\nI...","{'nodes': [{'slug': 'writing'}, {'slug': 'educ...",2024-08-01,Thursday,"[writing, education, artificial-intelligence]"
4,Mind Visuals,Stay in your creator zone and edit videos in s...,https://www.producthunt.com/posts/mind-visuals...,278,2024-08-01T07:01:00Z,Drag and drop animations for creators,61,{'nodes': [{'body': 'Mind Visuals is now live!...,"{'nodes': [{'slug': 'design-tools'}, {'slug': ...",2024-08-01,Thursday,"[design-tools, marketing, video]"


In [43]:
px.box(df['votesCount'])

Generally anything above 200 upvotes on Product Hunt has a chance of becoming product of the day and 200-300 upvotes is considered a good lauch.

In [44]:
df[df['votesCount']>200].shape

(239, 12)

In [45]:
def categorize_votes(vote):
    if vote > 150:
        return 'Success'
    elif vote > 50:
        return 'Average'
    else:
        return 'low'

df['vote_category'] = df['votesCount'].apply(categorize_votes)

In [46]:
first_comments=[ast.literal_eval(df['comments'][i])['nodes'][0]['body'] 
 if len(ast.literal_eval(df['comments'][i])['nodes']) else None
 for i in range(len(df))]

In [47]:
df['first_comments'] = first_comments
df['first_comments_len']=[len(first_comments[i]) if first_comments[i] else 0 for i in range(len(df['first_comments']))]

In [48]:
ast.literal_eval(df['topics'][0])['nodes']

[{'slug': 'developer-tools'},
 {'slug': 'artificial-intelligence'},
 {'slug': 'bots'}]

In [49]:
df[['votesCount','commentsCount','first_comments_len']].corr()

Unnamed: 0,votesCount,commentsCount,first_comments_len
votesCount,1.0,0.701128,0.246992
commentsCount,0.701128,1.0,0.298725
first_comments_len,0.246992,0.298725,1.0


In [50]:
len(list(sorted(set(df['topic_list'].explode().values))))

295

In [51]:
mlb = MultiLabelBinarizer()
topics_one_hot = mlb.fit_transform(df['topic_list'])

In [52]:
px.histogram([len(top_posts_df['description'][i]) for i in range(len(top_posts_df))])

In [53]:
px.histogram([len(last_posts_df['description'][i]) for i in range(len(last_posts_df))])

In [54]:
topics_one_hot_df = pd.DataFrame(topics_one_hot, columns=mlb.classes_)
df = pd.concat([df, topics_one_hot_df], axis=1)
df.head()


Unnamed: 0,name,description,url,votesCount,createdAt,tagline,commentsCount,comments,topics,Date,...,weather,web-app,web-design,web3,website-builder,wi-fi,word-games,wordpress,writing,youtube
0,Not Diamond,Not Diamond isn’t like other chatbots you’ve u...,https://www.producthunt.com/posts/not-diamond?...,691,2024-08-01T07:01:00Z,The last chatbot you’ll ever need,219,{'nodes': [{'body': 'Hey Product Hunt!\r\n\r\n...,"{'nodes': [{'slug': 'developer-tools'}, {'slug...",2024-08-01,...,0,0,0,0,0,0,0,0,0,0
1,Clarity,Clarity is purpose-built for founder-led sales...,https://www.producthunt.com/posts/clarity-9f37...,545,2024-08-01T07:01:00Z,A meeting recorder for founder-led sales,199,"{'nodes': [{'body': ""Congrats on the launch Au...","{'nodes': [{'slug': 'productivity'}, {'slug': ...",2024-08-01,...,0,0,0,0,0,0,0,0,0,0
2,Mito Health,Mito Health uses blood work at regular labs to...,https://www.producthunt.com/posts/mito-health?...,358,2024-08-01T07:01:00Z,Better insights from bloodwork,117,{'nodes': [{'body': 'Hi Product Hunt! 👋 \n\nWe...,"{'nodes': [{'slug': 'health-fitness'}, {'slug'...",2024-08-01,...,0,0,0,0,0,0,0,0,0,0
3,EduWiz.AI,Improve your writing effortlessly with EduWiz....,https://www.producthunt.com/posts/eduwiz-ai?ut...,292,2024-08-01T07:01:00Z,Write magical paperwork in seconds with AI,105,"{'nodes': [{'body': 'Hey, Product Hunt! 👋\n\nI...","{'nodes': [{'slug': 'writing'}, {'slug': 'educ...",2024-08-01,...,0,0,0,0,0,0,0,0,1,0
4,Mind Visuals,Stay in your creator zone and edit videos in s...,https://www.producthunt.com/posts/mind-visuals...,278,2024-08-01T07:01:00Z,Drag and drop animations for creators,61,{'nodes': [{'body': 'Mind Visuals is now live!...,"{'nodes': [{'slug': 'design-tools'}, {'slug': ...",2024-08-01,...,0,0,0,0,0,0,0,0,0,0


In [55]:
daytoint = {'Monday': 1,
 'Tuesday': 2,
 'Wednesday': 3,
 'Thursday': 4,
 'Friday': 5,
 'Saturday': 6,
 'Sunday': 7,
}

vote_category_dict ={'low':0,
    'Average':1,
 'Success':2}

In [56]:
df['daytoint']= df['day'].map(daytoint)
df['description_len']= df['description'].apply(lambda i: len(i))

In [57]:
x= df.drop(list(df.dtypes[df.dtypes==object].index)+['votesCount','commentsCount'],axis=1)
y = df['vote_category']
x.shape,y.shape

((1860, 298), (1860,))

In [58]:
x_train,x_test,y_train,y_test = train_test_split(x,y.map(vote_category_dict),test_size = 0.2)
x_train.shape,y_train.shape,x_test.shape,y_test.shape

((1488, 298), (1488,), (372, 298), (372,))

In [59]:
y.map(vote_category_dict).value_counts()

vote_category
0    1060
1     458
2     342
Name: count, dtype: int64

In [60]:
xgb= XGBClassifier(max_depth =3,max_leaves =4,learning_rate =0.1)
xgb.fit(x_train,y_train)

In [61]:
print(f'Training Accuracy: {accuracy_score(xgb.predict(x_train),y_train)}')
print(f'Training Confusion matrix: {confusion_matrix(xgb.predict(x_train),y_train)}')

Training Accuracy: 0.6424731182795699
Training Confusion matrix: [[768 239 174]
 [ 38 115  30]
 [ 25  26  73]]


In [62]:
print(f'Test Accuracy: {accuracy_score(xgb.predict(x_test),y_test)}')
print(f'Test Confusion matrix: {confusion_matrix(xgb.predict(x_test),y_test)}')

Test Accuracy: 0.5752688172043011
Test Confusion matrix: [[196  63  44]
 [ 20   8  11]
 [ 13   7  10]]


In [63]:
print(classification_report(xgb.predict(x_test),y_test))

              precision    recall  f1-score   support

           0       0.86      0.65      0.74       303
           1       0.10      0.21      0.14        39
           2       0.15      0.33      0.21        30

    accuracy                           0.58       372
   macro avg       0.37      0.40      0.36       372
weighted avg       0.72      0.58      0.63       372

