In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import LinearSVC
from io import StringIO
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_selection import chi2
import numpy as np

df = pd.read_csv('enhanced_train_tweets.csv',names = ["userID","tweets"])
df.head()

Unnamed: 0,userID,tweets
0,8746,lets try and catch up live next week going to...
1,2423,do not pay for white teeth moms whitening meth...
2,564,treat is awesome but trick is more fun you can...
3,3039,not a cool joke i got pretty pissed for a spl...
4,9661,ford vehicles called worldclass ford vehicles...


In [2]:
df = df[pd.notnull(df['tweets'])]

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 9297 entries, 0 to 9296
Data columns (total 2 columns):
userID    9297 non-null int64
tweets    9297 non-null object
dtypes: int64(1), object(1)
memory usage: 217.9+ KB


In [4]:
col = ['userID', 'tweets']
df = df[col]

In [None]:
df.columns

Index(['userID', 'tweets'], dtype='object')

In [None]:
# 'Product' = 'userID', 'Consumer_complaint_narrative' = 'tweets'
df.columns = ['userID', 'tweets']

In [None]:
df['category_id'] = df['userID'].factorize()[0]
category_id_df = df[['userID', 'category_id']].drop_duplicates().sort_values('category_id')
category_to_id = dict(category_id_df.values)
id_to_category = dict(category_id_df[['category_id', 'userID']].values)

In [None]:
df.head()

Unnamed: 0,userID,tweets,category_id
0,8746,lets try and catch up live next week going to...,0
1,2423,do not pay for white teeth moms whitening meth...,1
2,564,treat is awesome but trick is more fun you can...,2
3,3039,not a cool joke i got pretty pissed for a spl...,3
4,9661,ford vehicles called worldclass ford vehicles...,4


In [None]:
tfidf = TfidfVectorizer(sublinear_tf=True, min_df=5, norm='l2', encoding='latin-1', ngram_range=(1, 2), stop_words='english')

features = tfidf.fit_transform(df.tweets).toarray()
labels = df.category_id
features.shape

(9297, 57653)

In [None]:
N = 2
check = 0
for Product, category_id in sorted(category_to_id.items()):
    print ("check",check,"catid",category_id)
    features_chi2 = chi2(features, labels == category_id)
    indices = np.argsort(features_chi2[0])
    feature_names = np.array(tfidf.get_feature_names())[indices]
    unigrams = [v for v in feature_names if len(v.split(' ')) == 1]
    bigrams = [v for v in feature_names if len(v.split(' ')) == 2]
    check += 1

check 0 catid 9283
check 1 catid 7849
check 2 catid 7230
check 3 catid 5396
check 4 catid 5110
check 5 catid 1005
check 6 catid 5484
check 7 catid 3596
check 8 catid 7875
check 9 catid 3489
check 10 catid 5342
check 11 catid 2053
check 12 catid 1082
check 13 catid 1979
check 14 catid 3361
check 15 catid 3247
check 16 catid 4441
check 17 catid 6606
check 18 catid 992
check 19 catid 7974
check 20 catid 6366
check 21 catid 845
check 22 catid 2613
check 23 catid 2757
check 24 catid 1627
check 25 catid 58
check 26 catid 8156
check 27 catid 4410
check 28 catid 4238
check 29 catid 6670
check 30 catid 1462
check 31 catid 1516
check 32 catid 192
check 33 catid 6481
check 34 catid 1750
check 35 catid 8366
check 36 catid 6379
check 37 catid 3888
check 38 catid 4057
check 39 catid 6207
check 40 catid 9178
check 41 catid 8407
check 42 catid 5914
check 43 catid 2917
check 44 catid 1761
check 45 catid 5714
check 46 catid 5815
check 47 catid 3715
check 48 catid 3919
check 49 catid 2081
check 50 catid 

check 397 catid 1702
check 398 catid 4403
check 399 catid 3692
check 400 catid 8905
check 401 catid 8966
check 402 catid 3663
check 403 catid 351
check 404 catid 1027
check 405 catid 3571
check 406 catid 5730
check 407 catid 4093
check 408 catid 3598
check 409 catid 9014
check 410 catid 651
check 411 catid 4436
check 412 catid 9291
check 413 catid 7901
check 414 catid 7727
check 415 catid 459
check 416 catid 7212
check 417 catid 3760
check 418 catid 5249
check 419 catid 1563
check 420 catid 7721
check 421 catid 8140
check 422 catid 3082
check 423 catid 8960
check 424 catid 9265
check 425 catid 6771
check 426 catid 3371
check 427 catid 8632
check 428 catid 5946
check 429 catid 2339
check 430 catid 1846
check 431 catid 3786
check 432 catid 6544
check 433 catid 8073
check 434 catid 1854
check 435 catid 7084
check 436 catid 5077
check 437 catid 21
check 438 catid 115
check 439 catid 220
check 440 catid 1312
check 441 catid 309
check 442 catid 1876
check 443 catid 8702
check 444 catid 6059


check 791 catid 8990
check 792 catid 1046
check 793 catid 4077
check 794 catid 6259
check 795 catid 5074
check 796 catid 2661
check 797 catid 7206
check 798 catid 8917
check 799 catid 2222
check 800 catid 1436
check 801 catid 4195
check 802 catid 8722
check 803 catid 2819
check 804 catid 7367
check 805 catid 3681
check 806 catid 9028
check 807 catid 5575
check 808 catid 394
check 809 catid 5424
check 810 catid 737
check 811 catid 4018
check 812 catid 2548
check 813 catid 3743
check 814 catid 3237
check 815 catid 8050
check 816 catid 3417
check 817 catid 4632
check 818 catid 2189
check 819 catid 709
check 820 catid 3457
check 821 catid 494
check 822 catid 925
check 823 catid 389
check 824 catid 2871
check 825 catid 7523
check 826 catid 147
check 827 catid 6564
check 828 catid 5028
check 829 catid 5601
check 830 catid 249
check 831 catid 256
check 832 catid 7273
check 833 catid 7703
check 834 catid 8287
check 835 catid 428
check 836 catid 6833
check 837 catid 7657
check 838 catid 8383
ch

check 1176 catid 1344
check 1177 catid 1680
check 1178 catid 6310
check 1179 catid 2300
check 1180 catid 2454
check 1181 catid 7695
check 1182 catid 3186
check 1183 catid 7708
check 1184 catid 7981
check 1185 catid 3220
check 1186 catid 2699
check 1187 catid 69
check 1188 catid 1570
check 1189 catid 8580
check 1190 catid 4120
check 1191 catid 8527
check 1192 catid 5588
check 1193 catid 3412
check 1194 catid 5720
check 1195 catid 3253
check 1196 catid 432
check 1197 catid 5736
check 1198 catid 4996
check 1199 catid 3898
check 1200 catid 7324
check 1201 catid 2741
check 1202 catid 2749
check 1203 catid 6391
check 1204 catid 5565
check 1205 catid 4757
check 1206 catid 5791
check 1207 catid 4754
check 1208 catid 3662
check 1209 catid 3445
check 1210 catid 7992
check 1211 catid 5246
check 1212 catid 7254
check 1213 catid 4264
check 1214 catid 820
check 1215 catid 581
check 1216 catid 2321
check 1217 catid 5304
check 1218 catid 4650
check 1219 catid 6471
check 1220 catid 2427
check 1221 cati

check 1551 catid 629
check 1552 catid 7115
check 1553 catid 8138
check 1554 catid 4948
check 1555 catid 2626
check 1556 catid 7912
check 1557 catid 4848
check 1558 catid 7165
check 1559 catid 6553
check 1560 catid 3176
check 1561 catid 7923
check 1562 catid 6300
check 1563 catid 3041
check 1564 catid 2542
check 1565 catid 9148
check 1566 catid 970
check 1567 catid 8237
check 1568 catid 5393
check 1569 catid 3373
check 1570 catid 8152
check 1571 catid 444
check 1572 catid 2097
check 1573 catid 9074
check 1574 catid 8823
check 1575 catid 8906
check 1576 catid 6410
check 1577 catid 3828
check 1578 catid 8255
check 1579 catid 3056
check 1580 catid 3718
check 1581 catid 5294
check 1582 catid 9154
check 1583 catid 3376
check 1584 catid 6337
check 1585 catid 179
check 1586 catid 1358
check 1587 catid 5739
check 1588 catid 2594
check 1589 catid 5363
check 1590 catid 2633
check 1591 catid 922
check 1592 catid 1561
check 1593 catid 548
check 1594 catid 13
check 1595 catid 8804
check 1596 catid 8

check 1926 catid 4373
check 1927 catid 1515
check 1928 catid 2459
check 1929 catid 6280
check 1930 catid 4718
check 1931 catid 9146
check 1932 catid 4607
check 1933 catid 4020
check 1934 catid 1349
check 1935 catid 1938
check 1936 catid 3217
check 1937 catid 395
check 1938 catid 3242
check 1939 catid 1747
check 1940 catid 1834
check 1941 catid 6693
check 1942 catid 8006
check 1943 catid 8698
check 1944 catid 6839
check 1945 catid 8543
check 1946 catid 1155
check 1947 catid 894
check 1948 catid 7835
check 1949 catid 566
check 1950 catid 2569
check 1951 catid 478
check 1952 catid 4303
check 1953 catid 8034
check 1954 catid 3690
check 1955 catid 2963
check 1956 catid 5421
check 1957 catid 5116
check 1958 catid 6318
check 1959 catid 2736
check 1960 catid 1912
check 1961 catid 7483
check 1962 catid 5090
check 1963 catid 1470
check 1964 catid 8982
check 1965 catid 4709
check 1966 catid 2823
check 1967 catid 8436
check 1968 catid 2665
check 1969 catid 1855
check 1970 catid 936
check 1971 cati

check 2302 catid 4061
check 2303 catid 7444
check 2304 catid 6979
check 2305 catid 8914
check 2306 catid 2889
check 2307 catid 86
check 2308 catid 5099
check 2309 catid 7493
check 2310 catid 6888
check 2311 catid 7932
check 2312 catid 8912
check 2313 catid 5953
check 2314 catid 1533
check 2315 catid 5561
check 2316 catid 1586
check 2317 catid 1181
check 2318 catid 8461
check 2319 catid 1504
check 2320 catid 8490
check 2321 catid 6587
check 2322 catid 2597
check 2323 catid 8135
check 2324 catid 6926
check 2325 catid 2806
check 2326 catid 156
check 2327 catid 6353
check 2328 catid 3232
check 2329 catid 7340
check 2330 catid 3813
check 2331 catid 5328
check 2332 catid 2442
check 2333 catid 8877
check 2334 catid 6816
check 2335 catid 4254
check 2336 catid 2424
check 2337 catid 6226
check 2338 catid 686
check 2339 catid 5073
check 2340 catid 2824
check 2341 catid 3810
check 2342 catid 8681
check 2343 catid 2450
check 2344 catid 917
check 2345 catid 7605
check 2346 catid 7994
check 2347 cati

check 2677 catid 5415
check 2678 catid 8649
check 2679 catid 735
check 2680 catid 7954
check 2681 catid 531
check 2682 catid 628
check 2683 catid 5423
check 2684 catid 2417
check 2685 catid 8270
check 2686 catid 1025
check 2687 catid 5865
check 2688 catid 8200
check 2689 catid 5133
check 2690 catid 8469
check 2691 catid 8739
check 2692 catid 7167
check 2693 catid 2278
check 2694 catid 4704
check 2695 catid 6736
check 2696 catid 2923
check 2697 catid 3856
check 2698 catid 8879
check 2699 catid 1932
check 2700 catid 8666
check 2701 catid 5139
check 2702 catid 2723
check 2703 catid 3441
check 2704 catid 9034
check 2705 catid 7454
check 2706 catid 6056
check 2707 catid 515
check 2708 catid 6156
check 2709 catid 6255
check 2710 catid 5855
check 2711 catid 2346
check 2712 catid 7236
check 2713 catid 2147
check 2714 catid 1905
check 2715 catid 6225
check 2716 catid 6650
check 2717 catid 4883
check 2718 catid 8269
check 2719 catid 2451
check 2720 catid 2765
check 2721 catid 2962
check 2722 cat

check 3052 catid 2433
check 3053 catid 8730
check 3054 catid 7414
check 3055 catid 3905
check 3056 catid 5478
check 3057 catid 968
check 3058 catid 7742
check 3059 catid 2678
check 3060 catid 9040
check 3061 catid 2753
check 3062 catid 7181
check 3063 catid 3212
check 3064 catid 2770
check 3065 catid 427
check 3066 catid 7894
check 3067 catid 1236
check 3068 catid 2443
check 3069 catid 4694
check 3070 catid 3592
check 3071 catid 6788
check 3072 catid 5000
check 3073 catid 9031
check 3074 catid 6427
check 3075 catid 8236
check 3076 catid 5402
check 3077 catid 3739
check 3078 catid 803
check 3079 catid 3037
check 3080 catid 9013
check 3081 catid 2448
check 3082 catid 5657
check 3083 catid 4850
check 3084 catid 7755
check 3085 catid 1380
check 3086 catid 1202
check 3087 catid 5311
check 3088 catid 5426
check 3089 catid 8027
check 3090 catid 2969
check 3091 catid 282
check 3092 catid 8571
check 3093 catid 8004
check 3094 catid 4214
check 3095 catid 6038
check 3096 catid 692
check 3097 cati

check 3427 catid 2292
check 3428 catid 8769
check 3429 catid 3187
check 3430 catid 1275
check 3431 catid 2589
check 3432 catid 4253
check 3433 catid 4536
check 3434 catid 1648
check 3435 catid 8621
check 3436 catid 1630
check 3437 catid 8174
check 3438 catid 7362
check 3439 catid 5439
check 3440 catid 3808
check 3441 catid 1913
check 3442 catid 2875
check 3443 catid 825
check 3444 catid 6682
check 3445 catid 7092
check 3446 catid 8538
check 3447 catid 7782
check 3448 catid 6376
check 3449 catid 2455
check 3450 catid 8709
check 3451 catid 1282
check 3452 catid 3645
check 3453 catid 9199
check 3454 catid 9004
check 3455 catid 9095
check 3456 catid 5307
check 3457 catid 1260
check 3458 catid 437
check 3459 catid 8898
check 3460 catid 5773
check 3461 catid 1962
check 3462 catid 5702
check 3463 catid 120
check 3464 catid 7669
check 3465 catid 7416
check 3466 catid 3020
check 3467 catid 5778
check 3468 catid 3595
check 3469 catid 4794
check 3470 catid 3722
check 3471 catid 1863
check 3472 ca

check 3802 catid 8384
check 3803 catid 2677
check 3804 catid 3479
check 3805 catid 900
check 3806 catid 7499
check 3807 catid 4640
check 3808 catid 1265
check 3809 catid 224
check 3810 catid 2342
check 3811 catid 3832
check 3812 catid 7804
check 3813 catid 3954
check 3814 catid 5900
check 3815 catid 9152
check 3816 catid 4385
check 3817 catid 1180
check 3818 catid 345
check 3819 catid 7783
check 3820 catid 3533
check 3821 catid 8561
check 3822 catid 8771
check 3823 catid 7357
check 3824 catid 1730
check 3825 catid 7610
check 3826 catid 7498
check 3827 catid 8957
check 3828 catid 1214
check 3829 catid 6459
check 3830 catid 6262
check 3831 catid 4498
check 3832 catid 3099
check 3833 catid 4285
check 3834 catid 3238
check 3835 catid 3151
check 3836 catid 7004
check 3837 catid 342
check 3838 catid 9145
check 3839 catid 6834
check 3840 catid 201
check 3841 catid 3608
check 3842 catid 3696
check 3843 catid 6465
check 3844 catid 1989
check 3845 catid 1297
check 3846 catid 7127
check 3847 cati

check 4177 catid 2546
check 4178 catid 3811
check 4179 catid 4789
check 4180 catid 8134
check 4181 catid 4177
check 4182 catid 5885
check 4183 catid 4776
check 4184 catid 4395
check 4185 catid 5982
check 4186 catid 7743
check 4187 catid 1550
check 4188 catid 7203
check 4189 catid 2862
check 4190 catid 980
check 4191 catid 2309
check 4192 catid 9198
check 4193 catid 1966
check 4194 catid 1364
check 4195 catid 5194
check 4196 catid 6460
check 4197 catid 832
check 4198 catid 7468
check 4199 catid 1469
check 4200 catid 4662
check 4201 catid 897
check 4202 catid 4621
check 4203 catid 2382
check 4204 catid 6679
check 4205 catid 9107
check 4206 catid 8655
check 4207 catid 1928
check 4208 catid 8198
check 4209 catid 4686
check 4210 catid 5106
check 4211 catid 3962
check 4212 catid 5856
check 4213 catid 9106
check 4214 catid 2122
check 4215 catid 7689
check 4216 catid 9003
check 4217 catid 2407
check 4218 catid 3841
check 4219 catid 1574
check 4220 catid 238
check 4221 catid 2502
check 4222 cat

check 4552 catid 2685
check 4553 catid 5267
check 4554 catid 7778
check 4555 catid 5293
check 4556 catid 7390
check 4557 catid 4940
check 4558 catid 5752
check 4559 catid 579
check 4560 catid 5518
check 4561 catid 5803
check 4562 catid 8896
check 4563 catid 7740
check 4564 catid 1212
check 4565 catid 8595
check 4566 catid 2350
check 4567 catid 673
check 4568 catid 50
check 4569 catid 9252
check 4570 catid 511
check 4571 catid 4220
check 4572 catid 2371
check 4573 catid 4957
check 4574 catid 1449
check 4575 catid 4023
check 4576 catid 3290
check 4577 catid 812
check 4578 catid 2836
check 4579 catid 3049
check 4580 catid 3121
check 4581 catid 1558
check 4582 catid 4956
check 4583 catid 470
check 4584 catid 8589
check 4585 catid 2180
check 4586 catid 8071
check 4587 catid 9029
check 4588 catid 3735
check 4589 catid 6977
check 4590 catid 8688
check 4591 catid 5255
check 4592 catid 8391
check 4593 catid 8878
check 4594 catid 605
check 4595 catid 3625
check 4596 catid 8847
check 4597 catid 1

check 4927 catid 1514
check 4928 catid 3194
check 4929 catid 8699
check 4930 catid 4880
check 4931 catid 2777
check 4932 catid 1799
check 4933 catid 308
check 4934 catid 2134
check 4935 catid 6658
check 4936 catid 1889
check 4937 catid 6783
check 4938 catid 5513
check 4939 catid 1661
check 4940 catid 7634
check 4941 catid 5772
check 4942 catid 5490
check 4943 catid 10
check 4944 catid 6432
check 4945 catid 7146
check 4946 catid 4452
check 4947 catid 853
check 4948 catid 6633
check 4949 catid 6980
check 4950 catid 5779
check 4951 catid 6621
check 4952 catid 446
check 4953 catid 9259
check 4954 catid 8294
check 4955 catid 847
check 4956 catid 5623
check 4957 catid 6380
check 4958 catid 6332
check 4959 catid 6554
check 4960 catid 3861
check 4961 catid 4027
check 4962 catid 7252
check 4963 catid 2428
check 4964 catid 3649
check 4965 catid 1279
check 4966 catid 275
check 4967 catid 4353
check 4968 catid 7301
check 4969 catid 698
check 4970 catid 3006
check 4971 catid 5993
check 4972 catid 7

check 5302 catid 142
check 5303 catid 5081
check 5304 catid 1012
check 5305 catid 5208
check 5306 catid 8938
check 5307 catid 5244
check 5308 catid 8048
check 5309 catid 2228
check 5310 catid 2037
check 5311 catid 4169
check 5312 catid 2967
check 5313 catid 8817
check 5314 catid 4695
check 5315 catid 5247
check 5316 catid 8643
check 5317 catid 4966
check 5318 catid 8455
check 5319 catid 7448
check 5320 catid 434
check 5321 catid 1934
check 5322 catid 8403
check 5323 catid 6773
check 5324 catid 5654
check 5325 catid 298
check 5326 catid 4261
check 5327 catid 8888
check 5328 catid 2510
check 5329 catid 3660
check 5330 catid 9079
check 5331 catid 374
check 5332 catid 4568
check 5333 catid 7131
check 5334 catid 4852
check 5335 catid 636
check 5336 catid 5583
check 5337 catid 8429
check 5338 catid 2755
check 5339 catid 4071
check 5340 catid 740
check 5341 catid 3209
check 5342 catid 3496
check 5343 catid 3183
check 5344 catid 6159
check 5345 catid 7067
check 5346 catid 8025
check 5347 catid

check 5677 catid 3284
check 5678 catid 6311
check 5679 catid 3791
check 5680 catid 9165
check 5681 catid 3762
check 5682 catid 4728
check 5683 catid 5169
check 5684 catid 8319
check 5685 catid 4619
check 5686 catid 4140
check 5687 catid 2385
check 5688 catid 349
check 5689 catid 38
check 5690 catid 1622
check 5691 catid 4727
check 5692 catid 102
check 5693 catid 496
check 5694 catid 3486
check 5695 catid 5065
check 5696 catid 6510
check 5697 catid 2647
check 5698 catid 938
check 5699 catid 9248
check 5700 catid 6143
check 5701 catid 2527
check 5702 catid 5344
check 5703 catid 7761
check 5704 catid 4338
check 5705 catid 1238
check 5706 catid 7738
check 5707 catid 811
check 5708 catid 3413
check 5709 catid 2945
check 5710 catid 6694
check 5711 catid 5126
check 5712 catid 6895
check 5713 catid 5290
check 5714 catid 132
check 5715 catid 3569
check 5716 catid 5527
check 5717 catid 1064
check 5718 catid 1094
check 5719 catid 4711
check 5720 catid 1676
check 5721 catid 8470
check 5722 catid 6

check 6051 catid 5712
check 6052 catid 5649
check 6053 catid 8330
check 6054 catid 8172
check 6055 catid 4466
check 6056 catid 4601
check 6057 catid 6600
check 6058 catid 6893
check 6059 catid 7786
check 6060 catid 6395
check 6061 catid 8360
check 6062 catid 4134
check 6063 catid 5015
check 6064 catid 4049
check 6065 catid 1595
check 6066 catid 7496
check 6067 catid 7660
check 6068 catid 8371
check 6069 catid 2284
check 6070 catid 3563
check 6071 catid 7082
check 6072 catid 3758
check 6073 catid 4898
check 6074 catid 4470
check 6075 catid 5070
check 6076 catid 5111
check 6077 catid 1318
check 6078 catid 6874
check 6079 catid 330
check 6080 catid 8765
check 6081 catid 7279
check 6082 catid 4521
check 6083 catid 4294
check 6084 catid 8324
check 6085 catid 3583
check 6086 catid 6146
check 6087 catid 5981
check 6088 catid 278
check 6089 catid 6085
check 6090 catid 7265
check 6091 catid 2950
check 6092 catid 367
check 6093 catid 5273
check 6094 catid 5859
check 6095 catid 9202
check 6096 ca

check 6426 catid 6849
check 6427 catid 6306
check 6428 catid 471
check 6429 catid 8257
check 6430 catid 6915
check 6431 catid 5990
check 6432 catid 8053
check 6433 catid 9232
check 6434 catid 6347
check 6435 catid 8390
check 6436 catid 5751
check 6437 catid 398
check 6438 catid 8505
check 6439 catid 7865
check 6440 catid 3997
check 6441 catid 2079
check 6442 catid 4860
check 6443 catid 4819
check 6444 catid 2556
check 6445 catid 4131
check 6446 catid 4178
check 6447 catid 8548
check 6448 catid 5173
check 6449 catid 4322
check 6450 catid 7430
check 6451 catid 3515
check 6452 catid 6008
check 6453 catid 1890
check 6454 catid 7924
check 6455 catid 2082
check 6456 catid 8732
check 6457 catid 9019
check 6458 catid 7775
check 6459 catid 1317
check 6460 catid 7774
check 6461 catid 182
check 6462 catid 8894
check 6463 catid 1494
check 6464 catid 5754
check 6465 catid 3353
check 6466 catid 1266
check 6467 catid 7036
check 6468 catid 779
check 6469 catid 3074
check 6470 catid 5512
check 6471 cat

check 6801 catid 6441
check 6802 catid 1513
check 6803 catid 2027
check 6804 catid 1579
check 6805 catid 8939
check 6806 catid 22
check 6807 catid 3827
check 6808 catid 8676
check 6809 catid 9012
check 6810 catid 711
check 6811 catid 8056
check 6812 catid 1111
check 6813 catid 1101
check 6814 catid 1673
check 6815 catid 3852
check 6816 catid 3025
check 6817 catid 5892
check 6818 catid 7405
check 6819 catid 945
check 6820 catid 3320
check 6821 catid 706
check 6822 catid 1010
check 6823 catid 4216
check 6824 catid 3298
check 6825 catid 6522
check 6826 catid 2801
check 6827 catid 1993
check 6828 catid 3401
check 6829 catid 7215
check 6830 catid 4487
check 6831 catid 6356
check 6832 catid 625
check 6833 catid 3017
check 6834 catid 774
check 6835 catid 9278
check 6836 catid 4544
check 6837 catid 3410
check 6838 catid 6390
check 6839 catid 679
check 6840 catid 5427
check 6841 catid 1931
check 6842 catid 1978
check 6843 catid 6617
check 6844 catid 6082
check 6845 catid 7452
check 6846 catid 4

check 7176 catid 1125
check 7177 catid 3716
check 7178 catid 4465
check 7179 catid 3518
check 7180 catid 2318
check 7181 catid 4995
check 7182 catid 1410
check 7183 catid 6885
check 7184 catid 9140
check 7185 catid 1526
check 7186 catid 3428
check 7187 catid 7670
check 7188 catid 319
check 7189 catid 5868
check 7190 catid 7730
check 7191 catid 7797
check 7192 catid 3599
check 7193 catid 17
check 7194 catid 687
check 7195 catid 1779
check 7196 catid 2884
check 7197 catid 3462
check 7198 catid 2418
check 7199 catid 4817
check 7200 catid 1270
check 7201 catid 2394
check 7202 catid 8223
check 7203 catid 207
check 7204 catid 2828
check 7205 catid 2378
check 7206 catid 8651
check 7207 catid 2656
check 7208 catid 7507
check 7209 catid 6170
check 7210 catid 843
check 7211 catid 3368
check 7212 catid 5742
check 7213 catid 4921
check 7214 catid 1895
check 7215 catid 2015
check 7216 catid 2547
check 7217 catid 4279
check 7218 catid 5622
check 7219 catid 1973
check 7220 catid 6313
check 7221 catid

check 7550 catid 3127
check 7551 catid 3814
check 7552 catid 1363
check 7553 catid 3244
check 7554 catid 7753
check 7555 catid 1604
check 7556 catid 2941
check 7557 catid 4959
check 7558 catid 8239
check 7559 catid 8476
check 7560 catid 5453
check 7561 catid 5525
check 7562 catid 46
check 7563 catid 1384
check 7564 catid 392
check 7565 catid 4618
check 7566 catid 3084
check 7567 catid 873
check 7568 catid 704
check 7569 catid 3619
check 7570 catid 6841
check 7571 catid 1127
check 7572 catid 4882
check 7573 catid 3055
check 7574 catid 5190
check 7575 catid 4858
check 7576 catid 122
check 7577 catid 2404
check 7578 catid 3812
check 7579 catid 7931
check 7580 catid 5792
check 7581 catid 6644
check 7582 catid 5937
check 7583 catid 1861
check 7584 catid 2825
check 7585 catid 2562
check 7586 catid 5048
check 7587 catid 4495
check 7588 catid 7881
check 7589 catid 7034
check 7590 catid 8031
check 7591 catid 1460
check 7592 catid 3210
check 7593 catid 2599
check 7594 catid 6066
check 7595 catid

In [None]:
X_train, X_test, y_train, y_test = train_test_split(df['tweets'], df['userID'], random_state = 0)
count_vect = CountVectorizer()
X_train_counts = count_vect.fit_transform(X_train)
tfidf_transformer = TfidfTransformer()
X_train_tfidf = tfidf_transformer.fit_transform(X_train_counts)

# clf = MultinomialNB().fit(X_train_tfidf, y_train)
clf = LinearSVC().fit(X_train_tfidf, y_train)

In [None]:
print(clf.predict(count_vect.transform(["RT @handle: Director of Global Brand Marketing, Hotels and Casino's $125k + 30% bonus - Orlando Fl http://bit.ly/4kUmBB #jobs #twitjobs"])))